Adding upstream version 6.1.76.upstream/6.1.76 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:49:45 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:49:45 +0000
commit: 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree: 848558de17fb3008cdf4d861b01ac7781903ce39 /net/netfilter
parent: Initial commit. (diff)
download: linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
257 files changed, 128925 insertions, 0 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
new file mode 100644
index 000000000..4b8d04640
--- /dev/null
+++ b/net/netfilter/Kconfig
@@ -0,0 +1,1666 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "Core Netfilter Configuration"
+	depends on INET && NETFILTER
+
+config NETFILTER_INGRESS
+	bool "Netfilter ingress support"
+	default y
+	select NET_INGRESS
+	help
+	  This allows you to classify packets from ingress using the Netfilter
+	  infrastructure.
+
+config NETFILTER_EGRESS
+	bool "Netfilter egress support"
+	default y
+	select NET_EGRESS
+	help
+	  This allows you to classify packets before transmission using the
+	  Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+	def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
+config NETFILTER_NETLINK
+	tristate
+
+config NETFILTER_FAMILY_BRIDGE
+	bool
+
+config NETFILTER_FAMILY_ARP
+	bool
+
+config NETFILTER_NETLINK_HOOK
+	tristate "Netfilter base hook dump support"
+	depends on NETFILTER_ADVANCED
+	depends on NF_TABLES
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  to list the base netfilter hooks via NFNETLINK.
+	  This is helpful for debugging.
+
+config NETFILTER_NETLINK_ACCT
+	tristate "Netfilter NFACCT over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for extended accounting via NFNETLINK.
+
+config NETFILTER_NETLINK_QUEUE
+	tristate "Netfilter NFQUEUE over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for queueing packets via NFNETLINK.
+
+config NETFILTER_NETLINK_LOG
+	tristate "Netfilter LOG over NFNETLINK interface"
+	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for logging packets via NFNETLINK.
+
+	  This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms,
+	  and is also scheduled to replace the old syslog-based ipt_LOG
+	  and ip6t_LOG modules.
+
+config NETFILTER_NETLINK_OSF
+	tristate "Netfilter OSF over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for passive OS fingerprint via NFNETLINK.
+
+config NF_CONNTRACK
+	tristate "Netfilter connection tracking support"
+	default m if NETFILTER_ADVANCED=n
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if IPV6 != n
+	help
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections.
+
+	  This is required to do Masquerading or other kinds of Network
+	  Address Translation.  It can also be used to enhance packet
+	  filtering (see `Connection state match support' below).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_LOG_SYSLOG
+	tristate "Syslog packet logging"
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option enable support for packet logging via syslog.
+	  It supports IPv4, IPV6, ARP and common transport protocols such
+	  as TCP and UDP.
+	  This is a simpler but less flexible logging method compared to
+	  CONFIG_NETFILTER_NETLINK_LOG.
+	  If both are enabled the backend to use can be configured at run-time
+	  by means of per-address-family sysctl tunables.
+
+if NF_CONNTRACK
+config NETFILTER_CONNCOUNT
+	tristate
+
+config NF_CONNTRACK_MARK
+	bool  'Connection mark tracking support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection marks, used by the
+	  `CONNMARK' target and `connmark' match. Similar to the mark value
+	  of packets, but this mark value is kept in the conntrack session
+	  instead of the individual packets.
+
+config NF_CONNTRACK_SECMARK
+	bool  'Connection tracking security mark support'
+	depends on NETWORK_SECMARK
+	default y if NETFILTER_ADVANCED=n
+	help
+	  This option enables security markings to be applied to
+	  connections.  Typically they are copied to connections from
+	  packets using the CONNSECMARK target and copied back from
+	  connections to packets with the same target, with the packets
+	  being originally labeled via SECMARK.
+
+	  If unsure, say 'N'.
+
+config NF_CONNTRACK_ZONES
+	bool  'Connection tracking zones'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection tracking zones.
+	  Normally, each connection needs to have a unique system wide
+	  identity. Connection tracking zones allow to have multiple
+	  connections using the same identity, as long as they are
+	  contained in different zones.
+
+	  If unsure, say `N'.
+
+config NF_CONNTRACK_PROCFS
+	bool "Supply CT list in procfs (OBSOLETE)"
+	depends on PROC_FS
+	help
+	This option enables for the list of known conntrack entries
+	to be shown in procfs under net/netfilter/nf_conntrack. This
+	is considered obsolete in favor of using the conntrack(8)
+	tool which uses Netlink.
+
+config NF_CONNTRACK_EVENTS
+	bool "Connection tracking events"
+	depends on NETFILTER_ADVANCED
+	help
+	  If this option is enabled, the connection tracking code will
+	  provide a notifier chain that can be used by other kernel code
+	  to get notified about changes in the connection tracking state.
+
+	  If unsure, say `N'.
+
+config NF_CONNTRACK_TIMEOUT
+	bool  'Connection tracking timeout'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection tracking timeout
+	  extension. This allows you to attach timeout policies to flow
+	  via the CT target.
+
+	  If unsure, say `N'.
+
+config NF_CONNTRACK_TIMESTAMP
+	bool  'Connection tracking timestamping'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection tracking timestamping.
+	  This allows you to store the flow start-time and to obtain
+	  the flow-stop time (once it has been destroyed) via Connection
+	  tracking events.
+
+	  If unsure, say `N'.
+
+config NF_CONNTRACK_LABELS
+	bool "Connection tracking labels"
+	help
+	  This option enables support for assigning user-defined flag bits
+	  to connection tracking entries.  It can be used with xtables connlabel
+	  match and the nftables ct expression.
+
+config NF_CT_PROTO_DCCP
+	bool 'DCCP protocol connection tracking support'
+	depends on NETFILTER_ADVANCED
+	default y
+	help
+	  With this option enabled, the layer 3 independent connection
+	  tracking code will be able to do state tracking on DCCP connections.
+
+	  If unsure, say Y.
+
+config NF_CT_PROTO_GRE
+	bool
+
+config NF_CT_PROTO_SCTP
+	bool 'SCTP protocol connection tracking support'
+	depends on NETFILTER_ADVANCED
+	default y
+	select LIBCRC32C
+	help
+	  With this option enabled, the layer 3 independent connection
+	  tracking code will be able to do state tracking on SCTP connections.
+
+	  If unsure, say Y.
+
+config NF_CT_PROTO_UDPLITE
+	bool 'UDP-Lite protocol connection tracking support'
+	depends on NETFILTER_ADVANCED
+	default y
+	help
+	  With this option enabled, the layer 3 independent connection
+	  tracking code will be able to do state tracking on UDP-Lite
+	  connections.
+
+	  If unsure, say Y.
+
+config NF_CONNTRACK_AMANDA
+	tristate "Amanda backup protocol support"
+	depends on NETFILTER_ADVANCED
+	select TEXTSEARCH
+	select TEXTSEARCH_KMP
+	help
+	  If you are running the Amanda backup package <http://www.amanda.org/>
+	  on this machine or machines that will be MASQUERADED through this
+	  machine, then you may want to enable this feature.  This allows the
+	  connection tracking and natting code to allow the sub-channels that
+	  Amanda requires for communication of the backup data, messages and
+	  index.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_FTP
+	tristate "FTP protocol support"
+	default m if NETFILTER_ADVANCED=n
+	help
+	  Tracking FTP connections is problematic: special helpers are
+	  required for tracking them, and doing masquerading and other forms
+	  of Network Address Translation on them.
+
+	  This is FTP support on Layer 3 independent connection tracking.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_H323
+	tristate "H.323 protocol support"
+	depends on IPV6 || IPV6=n
+	depends on NETFILTER_ADVANCED
+	help
+	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
+	  important VoIP protocols, it is widely used by voice hardware and
+	  software including voice gateways, IP phones, Netmeeting, OpenPhone,
+	  Gnomemeeting, etc.
+
+	  With this module you can support H.323 on a connection tracking/NAT
+	  firewall.
+
+	  This module supports RAS, Fast Start, H.245 Tunnelling, Call
+	  Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
+	  whiteboard, file transfer, etc. For more information, please
+	  visit http://nath323.sourceforge.net/.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_IRC
+	tristate "IRC protocol support"
+	default m if NETFILTER_ADVANCED=n
+	help
+	  There is a commonly-used extension to IRC called
+	  Direct Client-to-Client Protocol (DCC).  This enables users to send
+	  files to each other, and also chat to each other without the need
+	  of a server.  DCC Sending is used anywhere you send files over IRC,
+	  and DCC Chat is most commonly used by Eggdrop bots.  If you are
+	  using NAT, this extension will enable you to send files and initiate
+	  chats.  Note that you do NOT need this extension to get files or
+	  have others initiate chats, or everything else in IRC.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_BROADCAST
+	tristate
+
+config NF_CONNTRACK_NETBIOS_NS
+	tristate "NetBIOS name service protocol support"
+	select NF_CONNTRACK_BROADCAST
+	help
+	  NetBIOS name service requests are sent as broadcast messages from an
+	  unprivileged port and responded to with unicast messages to the
+	  same port. This make them hard to firewall properly because connection
+	  tracking doesn't deal with broadcasts. This helper tracks locally
+	  originating NetBIOS name service requests and the corresponding
+	  responses. It relies on correct IP address configuration, specifically
+	  netmask and broadcast address. When properly configured, the output
+	  of "ip address show" should look similar to this:
+
+	  $ ip -4 address show eth0
+	  4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
+	      inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_SNMP
+	tristate "SNMP service protocol support"
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_BROADCAST
+	help
+	  SNMP service requests are sent as broadcast messages from an
+	  unprivileged port and responded to with unicast messages to the
+	  same port. This make them hard to firewall properly because connection
+	  tracking doesn't deal with broadcasts. This helper tracks locally
+	  originating SNMP service requests and the corresponding
+	  responses. It relies on correct IP address configuration, specifically
+	  netmask and broadcast address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_PPTP
+	tristate "PPtP protocol support"
+	depends on NETFILTER_ADVANCED
+	select NF_CT_PROTO_GRE
+	help
+	  This module adds support for PPTP (Point to Point Tunnelling
+	  Protocol, RFC2637) connection tracking and NAT.
+
+	  If you are running PPTP sessions over a stateful firewall or NAT
+	  box, you may want to enable this feature.
+
+	  Please note that not all PPTP modes of operation are supported yet.
+	  Specifically these limitations exist:
+	    - Blindly assumes that control connections are always established
+	      in PNS->PAC direction. This is a violation of RFC2637.
+	    - Only supports a single call within each session
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_SANE
+	tristate "SANE protocol support"
+	depends on NETFILTER_ADVANCED
+	help
+	  SANE is a protocol for remote access to scanners as implemented
+	  by the 'saned' daemon. Like FTP, it uses separate control and
+	  data connections.
+
+	  With this module you can support SANE on a connection tracking
+	  firewall.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_SIP
+	tristate "SIP protocol support"
+	default m if NETFILTER_ADVANCED=n
+	help
+	  SIP is an application-layer control protocol that can establish,
+	  modify, and terminate multimedia sessions (conferences) such as
+	  Internet telephony calls. With the nf_conntrack_sip and
+	  the nf_nat_sip modules you can support the protocol on a connection
+	  tracking/NATing firewall.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CONNTRACK_TFTP
+	tristate "TFTP protocol support"
+	depends on NETFILTER_ADVANCED
+	help
+	  TFTP connection tracking helper, this is required depending
+	  on how restrictive your ruleset is.
+	  If you are using a tftp client behind -j SNAT or -j MASQUERADING
+	  you will need this.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_CT_NETLINK
+	tristate 'Connection tracking netlink interface'
+	select NETFILTER_NETLINK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option enables support for a netlink-based userspace interface
+
+config NF_CT_NETLINK_TIMEOUT
+	tristate  'Connection tracking timeout tuning via Netlink'
+	select NETFILTER_NETLINK
+	depends on NETFILTER_ADVANCED
+	depends on NF_CONNTRACK_TIMEOUT
+	help
+	  This option enables support for connection tracking timeout
+	  fine-grain tuning. This allows you to attach specific timeout
+	  policies to flows, instead of using the global timeout policy.
+
+	  If unsure, say `N'.
+
+config NF_CT_NETLINK_HELPER
+	tristate 'Connection tracking helpers in user-space via Netlink'
+	select NETFILTER_NETLINK
+	depends on NF_CT_NETLINK
+	depends on NETFILTER_NETLINK_QUEUE
+	depends on NETFILTER_NETLINK_GLUE_CT
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables the user-space connection tracking helpers
+	  infrastructure.
+
+	  If unsure, say `N'.
+
+config NETFILTER_NETLINK_GLUE_CT
+	bool "NFQUEUE and NFLOG integration with Connection Tracking"
+	default n
+	depends on (NETFILTER_NETLINK_QUEUE || NETFILTER_NETLINK_LOG) && NF_CT_NETLINK
+	help
+	  If this option is enabled, NFQUEUE and NFLOG can include
+	  Connection Tracking information together with the packet is
+	  the enqueued via NFNETLINK.
+
+config NF_NAT
+	tristate "Network Address Translation support"
+	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. This can be
+	  controlled by iptables, ip6tables or nft.
+
+config NF_NAT_AMANDA
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_AMANDA
+
+config NF_NAT_FTP
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_FTP
+
+config NF_NAT_IRC
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_IRC
+
+config NF_NAT_SIP
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_SIP
+
+config NF_NAT_TFTP
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_TFTP
+
+config NF_NAT_REDIRECT
+	bool
+
+config NF_NAT_MASQUERADE
+	bool
+
+config NETFILTER_SYNPROXY
+	tristate
+
+endif # NF_CONNTRACK
+
+config NF_TABLES
+	select NETFILTER_NETLINK
+	select LIBCRC32C
+	tristate "Netfilter nf_tables support"
+	help
+	  nftables is the new packet classification framework that intends to
+	  replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
+	  provides a pseudo-state machine with an extensible instruction-set
+	  (also known as expressions) that the userspace 'nft' utility
+	  (https://www.netfilter.org/projects/nftables) uses to build the
+	  rule-set. It also comes with the generic set infrastructure that
+	  allows you to construct mappings between matchings and actions
+	  for performance lookups.
+
+	  To compile it as a module, choose M here.
+
+if NF_TABLES
+config NF_TABLES_INET
+	depends on IPV6
+	select NF_TABLES_IPV4
+	select NF_TABLES_IPV6
+	bool "Netfilter nf_tables mixed IPv4/IPv6 tables support"
+	help
+	  This option enables support for a mixed IPv4/IPv6 "inet" table.
+
+config NF_TABLES_NETDEV
+	bool "Netfilter nf_tables netdev tables support"
+	help
+	  This option enables support for the "netdev" table.
+
+config NFT_NUMGEN
+	tristate "Netfilter nf_tables number generator module"
+	help
+	  This option adds the number generator expression used to perform
+	  incremental counting and random numbers bound to a upper limit.
+
+config NFT_CT
+	depends on NF_CONNTRACK
+	tristate "Netfilter nf_tables conntrack module"
+	help
+	  This option adds the "ct" expression that you can use to match
+	  connection tracking information such as the flow state.
+
+config NFT_FLOW_OFFLOAD
+	depends on NF_CONNTRACK && NF_FLOW_TABLE
+	tristate "Netfilter nf_tables hardware flow offload module"
+	help
+	  This option adds the "flow_offload" expression that you can use to
+	  choose what flows are placed into the hardware.
+
+config NFT_CONNLIMIT
+	tristate "Netfilter nf_tables connlimit module"
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_CONNCOUNT
+	help
+	  This option adds the "connlimit" expression that you can use to
+	  ratelimit rule matchings per connections.
+
+config NFT_LOG
+	tristate "Netfilter nf_tables log module"
+	help
+	  This option adds the "log" expression that you can use to log
+	  packets matching some criteria.
+
+config NFT_LIMIT
+	tristate "Netfilter nf_tables limit module"
+	help
+	  This option adds the "limit" expression that you can use to
+	  ratelimit rule matchings.
+
+config NFT_MASQ
+	depends on NF_CONNTRACK
+	depends on NF_NAT
+	select NF_NAT_MASQUERADE
+	tristate "Netfilter nf_tables masquerade support"
+	help
+	  This option adds the "masquerade" expression that you can use
+	  to perform NAT in the masquerade flavour.
+
+config NFT_REDIR
+	depends on NF_CONNTRACK
+	depends on NF_NAT
+	tristate "Netfilter nf_tables redirect support"
+	select NF_NAT_REDIRECT
+	help
+	  This options adds the "redirect" expression that you can use
+	  to perform NAT in the redirect flavour.
+
+config NFT_NAT
+	depends on NF_CONNTRACK
+	select NF_NAT
+	depends on NF_TABLES_IPV4 || NF_TABLES_IPV6
+	tristate "Netfilter nf_tables nat module"
+	help
+	  This option adds the "nat" expression that you can use to perform
+	  typical Network Address Translation (NAT) packet transformations.
+
+config NFT_TUNNEL
+	tristate "Netfilter nf_tables tunnel module"
+	help
+	  This option adds the "tunnel" expression that you can use to set
+	  tunneling policies.
+
+config NFT_OBJREF
+	tristate "Netfilter nf_tables stateful object reference module"
+	help
+	  This option adds the "objref" expression that allows you to refer to
+	  stateful objects, such as counters and quotas.
+
+config NFT_QUEUE
+	depends on NETFILTER_NETLINK_QUEUE
+	tristate "Netfilter nf_tables queue module"
+	help
+	  This is required if you intend to use the userspace queueing
+	  infrastructure (also known as NFQUEUE) from nftables.
+
+config NFT_QUOTA
+	tristate "Netfilter nf_tables quota module"
+	help
+	  This option adds the "quota" expression that you can use to match
+	  enforce bytes quotas.
+
+config NFT_REJECT
+	default m if NETFILTER_ADVANCED=n
+	tristate "Netfilter nf_tables reject support"
+	depends on !NF_TABLES_INET || (IPV6!=m || m)
+	help
+	  This option adds the "reject" expression that you can use to
+	  explicitly deny and notify via TCP reset/ICMP informational errors
+	  unallowed traffic.
+
+config NFT_REJECT_INET
+	depends on NF_TABLES_INET
+	default NFT_REJECT
+	tristate
+
+config NFT_COMPAT
+	depends on NETFILTER_XTABLES
+	tristate "Netfilter x_tables over nf_tables module"
+	help
+	  This is required if you intend to use any of existing
+	  x_tables match/target extensions over the nf_tables
+	  framework.
+
+config NFT_HASH
+	tristate "Netfilter nf_tables hash module"
+	help
+	  This option adds the "hash" expression that you can use to perform
+	  a hash operation on registers.
+
+config NFT_FIB
+	tristate
+
+config NFT_FIB_INET
+	depends on NF_TABLES_INET
+	depends on NFT_FIB_IPV4
+	depends on NFT_FIB_IPV6
+	tristate "Netfilter nf_tables fib inet support"
+	help
+	  This option allows using the FIB expression from the inet table.
+	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
+	  on the protocol of the packet.
+
+config NFT_XFRM
+	tristate "Netfilter nf_tables xfrm/IPSec security association matching"
+	depends on XFRM
+	help
+	  This option adds an expression that you can use to extract properties
+	  of a packets security association.
+
+config NFT_SOCKET
+	tristate "Netfilter nf_tables socket match support"
+	depends on IPV6 || IPV6=n
+	select NF_SOCKET_IPV4
+	select NF_SOCKET_IPV6 if NF_TABLES_IPV6
+	help
+	  This option allows matching for the presence or absence of a
+	  corresponding socket and its attributes.
+
+config NFT_OSF
+	tristate "Netfilter nf_tables passive OS fingerprint support"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_OSF
+	help
+	  This option allows matching packets from an specific OS.
+
+config NFT_TPROXY
+	tristate "Netfilter nf_tables tproxy support"
+	depends on IPV6 || IPV6=n
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if NF_TABLES_IPV6
+	select NF_TPROXY_IPV4
+	select NF_TPROXY_IPV6 if NF_TABLES_IPV6
+	help
+	  This makes transparent proxy support available in nftables.
+
+config NFT_SYNPROXY
+	tristate "Netfilter nf_tables SYNPROXY expression support"
+	depends on NF_CONNTRACK && NETFILTER_ADVANCED
+	select NETFILTER_SYNPROXY
+	select SYN_COOKIES
+	help
+	  The SYNPROXY expression allows you to intercept TCP connections and
+	  establish them using syncookies before they are passed on to the
+	  server. This allows to avoid conntrack and server resource usage
+	  during SYN-flood attacks.
+
+if NF_TABLES_NETDEV
+
+config NF_DUP_NETDEV
+	tristate "Netfilter packet duplication support"
+	help
+	  This option enables the generic packet duplication infrastructure
+	  for Netfilter.
+
+config NFT_DUP_NETDEV
+	tristate "Netfilter nf_tables netdev packet duplication support"
+	select NF_DUP_NETDEV
+	help
+	  This option enables packet duplication for the "netdev" family.
+
+config NFT_FWD_NETDEV
+	tristate "Netfilter nf_tables netdev packet forwarding support"
+	select NF_DUP_NETDEV
+	help
+	  This option enables packet forwarding for the "netdev" family.
+
+config NFT_FIB_NETDEV
+	depends on NFT_FIB_IPV4
+	depends on NFT_FIB_IPV6
+	tristate "Netfilter nf_tables netdev fib lookups support"
+	help
+	  This option allows using the FIB expression from the netdev table.
+	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
+	  on the protocol of the packet.
+
+config NFT_REJECT_NETDEV
+	depends on NFT_REJECT_IPV4
+	depends on NFT_REJECT_IPV6
+	tristate "Netfilter nf_tables netdev REJECT support"
+	help
+	  This option enables the REJECT support from the netdev table.
+	  The return packet generation will be delegated to the IPv4
+	  or IPv6 ICMP or TCP RST implementation depending on the
+	  protocol of the packet.
+
+endif # NF_TABLES_NETDEV
+
+endif # NF_TABLES
+
+config NF_FLOW_TABLE_INET
+	tristate "Netfilter flow table mixed IPv4/IPv6 module"
+	depends on NF_FLOW_TABLE
+	help
+	  This option adds the flow table mixed IPv4/IPv6 support.
+
+	  To compile it as a module, choose M here.
+
+config NF_FLOW_TABLE
+	tristate "Netfilter flow table module"
+	depends on NETFILTER_INGRESS
+	depends on NF_CONNTRACK
+	depends on NF_TABLES
+	help
+	  This option adds the flow table core infrastructure.
+
+	  To compile it as a module, choose M here.
+
+config NF_FLOW_TABLE_PROCFS
+	bool "Supply flow table statistics in procfs"
+	depends on NF_FLOW_TABLE
+	depends on PROC_FS
+	help
+	  This option enables for the flow table offload statistics
+	  to be shown in procfs under net/netfilter/nf_flowtable.
+
+config NETFILTER_XTABLES
+	tristate "Netfilter Xtables support (required for ip_tables)"
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This is required if you intend to use any of ip_tables,
+	  ip6_tables or arp_tables.
+
+if NETFILTER_XTABLES
+
+config NETFILTER_XTABLES_COMPAT
+	bool "Netfilter Xtables 32bit support"
+	depends on COMPAT
+	default y
+	help
+	   This option provides a translation layer to run 32bit arp,ip(6),ebtables
+	   binaries on 64bit kernels.
+
+	   If unsure, say N.
+
+comment "Xtables combined modules"
+
+config NETFILTER_XT_MARK
+	tristate 'nfmark target and match support'
+	default m if NETFILTER_ADVANCED=n
+	help
+	This option adds the "MARK" target and "mark" match.
+
+	Netfilter mark matching allows you to match packets based on the
+	"nfmark" value in the packet.
+	The target allows you to create rules in the "mangle" table which alter
+	the netfilter mark (nfmark) field associated with the packet.
+
+	Prior to routing, the nfmark can influence the routing method and can
+	also be used by other subsystems to change their behavior.
+
+config NETFILTER_XT_CONNMARK
+	tristate 'ctmark target and match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_MARK
+	help
+	This option adds the "CONNMARK" target and "connmark" match.
+
+	Netfilter allows you to store a mark value per connection (a.k.a.
+	ctmark), similarly to the packet mark (nfmark). Using this
+	target and match, you can set and match on this mark.
+
+config NETFILTER_XT_SET
+	tristate 'set target and match support'
+	depends on IP_SET
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds the "SET" target and "set" match.
+
+	  Using this target and match, you can add/delete and match
+	  elements in the sets created by ipset(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+# alphabetically ordered list of targets
+
+comment "Xtables targets"
+
+config NETFILTER_XT_TARGET_AUDIT
+	tristate "AUDIT target support"
+	depends on AUDIT
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a 'AUDIT' target, which can be used to create
+	  audit records for packets dropped/accepted.
+
+	  To compileit as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_CHECKSUM
+	tristate "CHECKSUM target support"
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `CHECKSUM' target, which can be used in the iptables mangle
+	  table to work around buggy DHCP clients in virtualized environments.
+
+	  Some old DHCP clients drop packets because they are not aware
+	  that the checksum would normally be offloaded to hardware and
+	  thus should be considered valid.
+	  This target can be used to fill in the checksum using iptables
+	  when such packets are sent via a virtual network device.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_CLASSIFY
+	tristate '"CLASSIFY" target support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `CLASSIFY' target, which enables the user to set
+	  the priority of a packet. Some qdiscs can use this value for
+	  classification, among these are:
+
+	  atm, cbq, dsmark, pfifo_fast, htb, prio
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_CONNMARK
+	tristate  '"CONNMARK" target support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_CONNMARK
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
+
+config NETFILTER_XT_TARGET_CONNSECMARK
+	tristate '"CONNSECMARK" target support'
+	depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The CONNSECMARK target copies security markings from packets
+	  to connections, and restores security markings from connections
+	  to packets (if the packets are not already marked).  This would
+	  normally be used in conjunction with the SECMARK target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_CT
+	tristate '"CT" target support'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `CT' target, which allows to specify initial
+	  connection tracking parameters like events to be delivered and
+	  the helper to be used.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_DSCP
+	tristate '"DSCP" and "TOS" target support'
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `DSCP' target, which allows you to manipulate
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  It also adds the "TOS" target, which allows you to create rules in
+	  the "mangle" table which alter the Type Of Service field of an IPv4
+	  or the Priority field of an IPv6 packet, prior to routing.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_HL
+	tristate '"HL" hoplimit target support'
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
+	targets, which enable the user to change the
+	hoplimit/time-to-live value of the IP header.
+
+	While it is safe to decrement the hoplimit/TTL value, the
+	modules also allow to increment and set the hoplimit value of
+	the header to arbitrary values. This is EXTREMELY DANGEROUS
+	since you can easily create immortal packets that loop
+	forever on the network.
+
+config NETFILTER_XT_TARGET_HMARK
+	tristate '"HMARK" target support'
+	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+	depends on NETFILTER_ADVANCED
+	help
+	This option adds the "HMARK" target.
+
+	The target allows you to create rules in the "raw" and "mangle" tables
+	which set the skbuff mark by means of hash calculation within a given
+	range. The nfmark can influence the routing method and can also be used
+	by other subsystems to change their behaviour.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_IDLETIMER
+	tristate  "IDLETIMER target support"
+	depends on NETFILTER_ADVANCED
+	help
+
+	  This option adds the `IDLETIMER' target.  Each matching packet
+	  resets the timer associated with label specified when the rule is
+	  added.  When the timer expires, it triggers a sysfs notification.
+	  The remaining time for expiration can be read via sysfs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_LED
+	tristate '"LED" target support'
+	depends on LEDS_CLASS && LEDS_TRIGGERS
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `LED' target, which allows you to blink LEDs in
+	  response to particular packets passing through your machine.
+
+	  This can be used to turn a spare LED into a network activity LED,
+	  which only flashes in response to FTP transfers, for example.  Or
+	  you could have an LED which lights up for a minute or two every time
+	  somebody connects to your machine via SSH.
+
+	  You will need support for the "led" class to make this work.
+
+	  To create an LED trigger for incoming SSH traffic:
+	    iptables -A INPUT -p tcp --dport 22 -j LED --led-trigger-id ssh --led-delay 1000
+
+	  Then attach the new trigger to an LED on your system:
+	    echo netfilter-ssh > /sys/class/leds/<ledname>/trigger
+
+	  For more information on the LEDs available on your system, see
+	  Documentation/leds/leds-class.rst
+
+config NETFILTER_XT_TARGET_LOG
+	tristate "LOG target support"
+	select NF_LOG_SYSLOG
+	select NF_LOG_IPV6 if IP6_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option adds a `LOG' target, which allows you to create rules in
+	  any iptables table which records the packet header to the syslog.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_MARK
+	tristate '"MARK" target support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
+
+config NETFILTER_XT_NAT
+	tristate '"SNAT and DNAT" targets support'
+	depends on NF_NAT
+	help
+	This option enables the SNAT and DNAT targets.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_NETMAP
+	tristate '"NETMAP" target support'
+	depends on NF_NAT
+	help
+	NETMAP is an implementation of static 1:1 NAT mapping of network
+	addresses. It maps the network address part, while keeping the host
+	address part intact.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_NFLOG
+	tristate '"NFLOG" target support'
+	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_NETLINK_LOG
+	help
+	  This option enables the NFLOG target, which allows to LOG
+	  messages through nfnetlink_log.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_NFQUEUE
+	tristate '"NFQUEUE" target Support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_QUEUE
+	help
+	  This target replaced the old obsolete QUEUE target.
+
+	  As opposed to QUEUE, it supports 65535 different queues,
+	  not just one.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_NOTRACK
+	tristate  '"NOTRACK" target support (DEPRECATED)'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_TARGET_CT
+
+config NETFILTER_XT_TARGET_RATEEST
+	tristate '"RATEEST" target support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `RATEEST' target, which allows to measure
+	  rates similar to TC estimators. The `rateest' match can be
+	  used to match on the measured rates.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_REDIRECT
+	tristate "REDIRECT target support"
+	depends on NF_NAT
+	select NF_NAT_REDIRECT
+	help
+	REDIRECT is a special case of NAT: all incoming connections are
+	mapped onto the incoming interface's address, causing the packets to
+	come to the local machine instead of passing through. This is
+	useful for transparent proxies.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_MASQUERADE
+	tristate "MASQUERADE target support"
+	depends on NF_NAT
+	default m if NETFILTER_ADVANCED=n
+	select NF_NAT_MASQUERADE
+	help
+	  Masquerading is a special case of NAT: all outgoing connections are
+	  changed to seem to come from a particular interface's address, and
+	  if the interface goes down, those connections are lost.  This is
+	  only useful for dialup accounts with dynamic IP address (ie. your IP
+	  address will be different on next dialup).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destination'
+	depends on NETFILTER_ADVANCED
+	depends on IPV6 || IPV6=n
+	depends on !NF_CONNTRACK || NF_CONNTRACK
+	depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
+	select NF_DUP_IPV4
+	select NF_DUP_IPV6 if IP6_NF_IPTABLES
+	help
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
+config NETFILTER_XT_TARGET_TPROXY
+	tristate '"TPROXY" target transparent proxying support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	depends on IPV6 || IPV6=n
+	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+	depends on IP_NF_MANGLE
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
+	select NF_TPROXY_IPV4
+	select NF_TPROXY_IPV6 if IP6_NF_IPTABLES
+	help
+	  This option adds a `TPROXY' target, which is somewhat similar to
+	  REDIRECT.  It can only be used in the mangle table and is useful
+	  to redirect traffic to a transparent proxy.  It does _not_ depend
+	  on Netfilter connection tracking and NAT, unlike REDIRECT.
+	  For it to work you will have to configure certain iptables rules
+	  and use policy routing. For more information on how to set it up
+	  see Documentation/networking/tproxy.rst.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_TRACE
+	tristate  '"TRACE" target support'
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  The TRACE target allows you to mark packets so that the kernel
+	  will log every rule which match the packets as those traverse
+	  the tables, chains, rules.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_TARGET_SECMARK
+	tristate '"SECMARK" target support'
+	depends on NETWORK_SECMARK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The SECMARK target allows security marking of network
+	  packets, for use with security subsystems.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_TCPMSS
+	tristate '"TCPMSS" target support'
+	depends on IPV6 || IPV6=n
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option adds a `TCPMSS' target, which allows you to alter the
+	  MSS value of TCP SYN packets, to control the maximum size for that
+	  connection (usually limiting it to your outgoing interface's MTU
+	  minus 40).
+
+	  This is used to overcome criminally braindead ISPs or servers which
+	  block ICMP Fragmentation Needed packets.  The symptoms of this
+	  problem are that everything works fine from your Linux
+	  firewall/router, but machines behind it can never exchange large
+	  packets:
+	        1) Web browsers connect, then hang with no data received.
+	        2) Small mail works fine, but large emails hang.
+	        3) ssh works fine, but scp hangs after initial handshaking.
+
+	  Workaround: activate this option and add a rule to your firewall
+	  configuration like:
+
+	  iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
+	                 -j TCPMSS --clamp-mss-to-pmtu
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_TARGET_TCPOPTSTRIP
+	tristate '"TCPOPTSTRIP" target support'
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
+	  TCP options from TCP packets.
+
+# alphabetically ordered list of matches
+
+comment "Xtables matches"
+
+config NETFILTER_XT_MATCH_ADDRTYPE
+	tristate '"addrtype" address type match support'
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option allows you to match what routing thinks of an address,
+	  eg. UNICAST, LOCAL, BROADCAST, ...
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_BPF
+	tristate '"bpf" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  BPF matching applies a linux socket filter to each packet and
+	  accepts those for which the filter returns non-zero.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_CGROUP
+	tristate '"control group" match support'
+	depends on NETFILTER_ADVANCED
+	depends on CGROUPS
+	select CGROUP_NET_CLASSID
+	help
+	Socket/process control group matching allows you to match locally
+	generated packets based on which net_cls control group processes
+	belong to.
+
+config NETFILTER_XT_MATCH_CLUSTER
+	tristate '"cluster" match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  This option allows you to build work-load-sharing clusters of
+	  network servers/stateful firewalls without having a dedicated
+	  load-balancing router/server/switch. Basically, this match returns
+	  true when the packet must be handled by this cluster node. Thus,
+	  all nodes see all packets and this match decides which node handles
+	  what packets. The work-load sharing algorithm is based on source
+	  address hashing.
+
+	  If you say Y or M here, try `iptables -m cluster --help` for
+	  more information.
+
+config NETFILTER_XT_MATCH_COMMENT
+	tristate  '"comment" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `comment' dummy-match, which allows you to put
+	  comments in your iptables ruleset.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNBYTES
+	tristate  '"connbytes" per-connection counter match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `connbytes' match, which allows you to match the
+	  number of bytes and/or packets for each direction within a connection.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNLABEL
+	tristate '"connlabel" match support'
+	select NF_CONNTRACK_LABELS
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  This match allows you to test and assign userspace-defined labels names
+	  to a connection.  The kernel only stores bit values - mapping
+	  names to bits is done by userspace.
+
+	  Unlike connmark, more than 32 flag bits may be assigned to a
+	  connection simultaneously.
+
+config NETFILTER_XT_MATCH_CONNLIMIT
+	tristate '"connlimit" match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_CONNCOUNT
+	help
+	  This match allows you to match against the number of parallel
+	  connections to a server per client IP address (or address block).
+
+config NETFILTER_XT_MATCH_CONNMARK
+	tristate  '"connmark" connection mark match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_CONNMARK
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
+
+config NETFILTER_XT_MATCH_CONNTRACK
+	tristate '"conntrack" connection tracking match support'
+	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This is a general conntrack match module, a superset of the state match.
+
+	  It allows matching on additional conntrack information, which is
+	  useful in complex configurations, such as NAT gateways with multiple
+	  internet links or tunnels.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_CPU
+	tristate '"cpu" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  CPU matching allows you to match packets based on the CPU
+	  currently handling the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_DCCP
+	tristate '"dccp" protocol match support'
+	depends on NETFILTER_ADVANCED
+	default IP_DCCP
+	help
+	  With this option enabled, you will be able to use the iptables
+	  `dccp' match in order to match on DCCP source/destination ports
+	  and DCCP flags.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_DEVGROUP
+	tristate '"devgroup" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `devgroup' match, which allows to match on the
+	  device group a network device is assigned to.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_DSCP
+	tristate '"dscp" and "tos" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `DSCP' match, which allows you to match against
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  It will also add a "tos" match, which allows you to match packets
+	  based on the Type Of Service fields of the IPv4 packet (which share
+	  the same bits as DSCP).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_ECN
+	tristate '"ecn" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	This option adds an "ECN" match, which allows you to match against
+	the IPv4 and TCP header ECN fields.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_ESP
+	tristate '"esp" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This match extension allows you to match a range of SPIs
+	  inside ESP header of IPSec packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_HASHLIMIT
+	tristate '"hashlimit" match support'
+	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `hashlimit' match.
+
+	  As opposed to `limit', this match dynamically creates a hash table
+	  of limit buckets, based on your selection of source/destination
+	  addresses and/or ports.
+
+	  It enables you to express policies like `10kpps for any given
+	  destination address' or `500pps from any given source address'
+	  with a single rule.
+
+config NETFILTER_XT_MATCH_HELPER
+	tristate '"helper" match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  Helper matching allows you to match packets in dynamic connections
+	  tracked by a conntrack-helper, ie. nf_conntrack_ftp
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config NETFILTER_XT_MATCH_HL
+	tristate '"hl" hoplimit/TTL match support'
+	depends on NETFILTER_ADVANCED
+	help
+	HL matching allows you to match packets based on the hoplimit
+	in the IPv6 header, or the time-to-live field in the IPv4
+	header of the packet.
+
+config NETFILTER_XT_MATCH_IPCOMP
+	tristate '"ipcomp" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This match extension allows you to match a range of CPIs(16 bits)
+	  inside IPComp header of IPSec packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_IPRANGE
+	tristate '"iprange" address range match support'
+	depends on NETFILTER_ADVANCED
+	help
+	This option adds a "iprange" match, which allows you to match based on
+	an IP address range. (Normal iptables only matches on single addresses
+	with an optional mask.)
+
+	If unsure, say M.
+
+config NETFILTER_XT_MATCH_IPVS
+	tristate '"ipvs" match support'
+	depends on IP_VS
+	depends on NETFILTER_ADVANCED
+	depends on NF_CONNTRACK
+	help
+	  This option allows you to match against IPVS properties of a packet.
+
+	  If unsure, say N.
+
+config NETFILTER_XT_MATCH_L2TP
+	tristate '"l2tp" match support'
+	depends on NETFILTER_ADVANCED
+	default L2TP
+	help
+	This option adds an "L2TP" match, which allows you to match against
+	L2TP protocol header fields.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_LENGTH
+	tristate '"length" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option allows you to match the length of a packet against a
+	  specific value or range of values.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_LIMIT
+	tristate '"limit" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  limit matching allows you to control the rate at which a rule can be
+	  matched: mainly useful in combination with the LOG target ("LOG
+	  target support", below) and to avoid some Denial of Service attacks.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_MAC
+	tristate '"mac" address match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  MAC matching allows you to match packets based on the source
+	  Ethernet address of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_MARK
+	tristate '"mark" match support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
+
+config NETFILTER_XT_MATCH_MULTIPORT
+	tristate '"multiport" Multiple port match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  Multiport matching allows you to match TCP or UDP packets based on
+	  a series of source or destination ports: normally a rule can only
+	  match a single range of ports.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_NFACCT
+	tristate '"nfacct" match support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_ACCT
+	help
+	  This option allows you to use the extended accounting through
+	  nfnetlink_acct.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_OSF
+	tristate '"osf" Passive OS fingerprint match'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_OSF
+	help
+	  This option selects the Passive OS Fingerprinting match module
+	  that allows to passively match the remote operating system by
+	  analyzing incoming TCP SYN packets.
+
+	  Rules and loading software can be downloaded from
+	  http://www.ioremap.net/projects/osf
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_OWNER
+	tristate '"owner" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	Socket owner matching allows you to match locally-generated packets
+	based on who created the socket: the user or group. It is also
+	possible to check whether a socket actually exists.
+
+config NETFILTER_XT_MATCH_POLICY
+	tristate 'IPsec "policy" match support'
+	depends on XFRM
+	default m if NETFILTER_ADVANCED=n
+	help
+	  Policy matching allows you to match packets based on the
+	  IPsec policy that was used during decapsulation/will
+	  be used during encapsulation.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_PHYSDEV
+	tristate '"physdev" match support'
+	depends on BRIDGE && BRIDGE_NETFILTER
+	depends on NETFILTER_ADVANCED
+	help
+	  Physdev packet matching matches against the physical bridge ports
+	  the IP packet arrived on or will leave by.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_PKTTYPE
+	tristate '"pkttype" packet type match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  Packet type matching allows you to match a packet by
+	  its "class", eg. BROADCAST, MULTICAST, ...
+
+	  Typical usage:
+	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_QUOTA
+	tristate '"quota" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `quota' match, which allows to match on a
+	  byte counter.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_RATEEST
+	tristate '"rateest" match support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_TARGET_RATEEST
+	help
+	  This option adds a `rateest' match, which allows to match on the
+	  rate estimated by the RATEEST target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_REALM
+	tristate  '"realm" match support'
+	depends on NETFILTER_ADVANCED
+	select IP_ROUTE_CLASSID
+	help
+	  This option adds a `realm' match, which allows you to use the realm
+	  key from the routing subsystem inside iptables.
+
+	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
+	  in tc world.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_RECENT
+	tristate '"recent" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	This match is used for creating one or many lists of recently
+	used addresses and then matching against that/those list(s).
+
+	Short options are available by using 'iptables -m recent -h'
+	Official Website: <http://snowman.net/projects/ipt_recent/>
+
+config NETFILTER_XT_MATCH_SCTP
+	tristate  '"sctp" protocol match support'
+	depends on NETFILTER_ADVANCED
+	default IP_SCTP
+	help
+	  With this option enabled, you will be able to use the
+	  `sctp' match in order to match on SCTP source/destination ports
+	  and SCTP chunk types.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_SOCKET
+	tristate '"socket" match support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	depends on IPV6 || IPV6=n
+	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+	select NF_SOCKET_IPV4
+	select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
+	help
+	  This option adds a `socket' match, which can be used to match
+	  packets for which a TCP or UDP socket lookup finds a valid socket.
+	  It can be used in combination with the MARK target and policy
+	  routing to implement full featured non-locally bound sockets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_STATE
+	tristate '"state" match support'
+	depends on NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  Connection state matching allows you to match packets based on their
+	  relationship to a tracked connection (ie. previous packets).  This
+	  is a powerful tool for packet classification.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_STATISTIC
+	tristate '"statistic" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `statistic' match, which allows you to match
+	  on packets periodically or randomly with a given percentage.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_STRING
+	tristate  '"string" match support'
+	depends on NETFILTER_ADVANCED
+	select TEXTSEARCH
+	select TEXTSEARCH_KMP
+	select TEXTSEARCH_BM
+	select TEXTSEARCH_FSM
+	help
+	  This option adds a `string' match, which allows you to look for
+	  pattern matchings in packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_TCPMSS
+	tristate '"tcpmss" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `tcpmss' match, which allows you to examine the
+	  MSS value of TCP SYN packets, which control the maximum packet size
+	  for that connection.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NETFILTER_XT_MATCH_TIME
+	tristate '"time" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a "time" match, which allows you to match based on
+	  the packet arrival time (at the machine which netfilter is running)
+	  on) or departure time/date (for locally generated packets).
+
+	  If you say Y here, try `iptables -m time --help` for
+	  more information.
+
+	  If you want to compile it as a module, say M here.
+	  If unsure, say N.
+
+config NETFILTER_XT_MATCH_U32
+	tristate '"u32" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  u32 allows you to extract quantities of up to 4 bytes from a packet,
+	  AND them with specified masks, shift them by specified amounts and
+	  test whether the results are in any of a set of specified ranges.
+	  The specification of what to extract is general enough to skip over
+	  headers with lengths stored in the packet, as in IP or TCP header
+	  lengths.
+
+	  Details and examples are in the kernel module source.
+
+endif # NETFILTER_XTABLES
+
+endmenu
+
+source "net/netfilter/ipset/Kconfig"
+
+source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
new file mode 100644
index 000000000..0f060d100
--- /dev/null
+++ b/net/netfilter/Makefile
@@ -0,0 +1,228 @@
+# SPDX-License-Identifier: GPL-2.0
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
+
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
+		   nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
+		   nf_conntrack_proto_icmp.o \
+		   nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+
+nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
+
+obj-$(CONFIG_NETFILTER) = netfilter.o
+
+obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
+obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o
+obj-$(CONFIG_NETFILTER_NETLINK_HOOK) += nfnetlink_hook.o
+
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
+
+# netlink interface for nf_conntrack
+obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
+obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o
+obj-$(CONFIG_NF_CT_NETLINK_HELPER) += nfnetlink_cthelper.o
+
+# connection tracking helpers
+nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o
+
+obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
+obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
+obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
+obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
+obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
+obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
+obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
+obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
+obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
+
+nf_nat-y	:= nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
+
+obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
+
+obj-$(CONFIG_NF_NAT) += nf_nat.o
+nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
+nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
+
+ifeq ($(CONFIG_NF_NAT),m)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o
+else ifeq ($(CONFIG_NF_NAT),y)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF) += nf_nat_bpf.o
+endif
+
+# NAT helpers
+obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
+obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
+obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
+obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
+obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
+
+# SYNPROXY
+obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+
+obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
+
+# generic packet duplication from netdev family
+obj-$(CONFIG_NF_DUP_NETDEV)	+= nf_dup_netdev.o
+
+# nf_tables
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
+		  nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
+		  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
+		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
+		  nft_counter.o nft_chain_route.o nf_tables_offload.o \
+		  nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
+		  nft_set_pipapo.o
+
+ifdef CONFIG_X86_64
+ifndef CONFIG_UML
+nf_tables-objs += nft_set_pipapo_avx2.o
+endif
+endif
+
+obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
+obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
+obj-$(CONFIG_NFT_CONNLIMIT)	+= nft_connlimit.o
+obj-$(CONFIG_NFT_NUMGEN)	+= nft_numgen.o
+obj-$(CONFIG_NFT_CT)		+= nft_ct.o
+obj-$(CONFIG_NFT_FLOW_OFFLOAD)	+= nft_flow_offload.o
+obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
+obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
+obj-$(CONFIG_NFT_OBJREF)	+= nft_objref.o
+obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
+obj-$(CONFIG_NFT_QUOTA)		+= nft_quota.o
+obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
+obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
+obj-$(CONFIG_NFT_REJECT_NETDEV)	+= nft_reject_netdev.o
+obj-$(CONFIG_NFT_TUNNEL)	+= nft_tunnel.o
+obj-$(CONFIG_NFT_LOG)		+= nft_log.o
+obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
+obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
+obj-$(CONFIG_NFT_FIB)		+= nft_fib.o
+obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV)	+= nft_fib_netdev.o
+obj-$(CONFIG_NFT_SOCKET)	+= nft_socket.o
+obj-$(CONFIG_NFT_OSF)		+= nft_osf.o
+obj-$(CONFIG_NFT_TPROXY)	+= nft_tproxy.o
+obj-$(CONFIG_NFT_XFRM)		+= nft_xfrm.o
+obj-$(CONFIG_NFT_SYNPROXY)	+= nft_synproxy.o
+
+obj-$(CONFIG_NFT_NAT)		+= nft_chain_nat.o
+
+# nf_tables netdev
+obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
+obj-$(CONFIG_NFT_FWD_NETDEV)	+= nft_fwd_netdev.o
+
+# flow table infrastructure
+obj-$(CONFIG_NF_FLOW_TABLE)	+= nf_flow_table.o
+nf_flow_table-objs		:= nf_flow_table_core.o nf_flow_table_ip.o \
+				   nf_flow_table_offload.o
+nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
+
+obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+
+# generic X tables
+obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
+# combos
+obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
+obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
+obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o
+
+# targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_MASQUERADE) += xt_MASQUERADE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
+
+# matches
+obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_L2TP) += xt_l2tp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
+
+# ipset
+obj-$(CONFIG_IP_SET) += ipset/
+
+# IPVS
+obj-$(CONFIG_IP_VS) += ipvs/
+
+# lwtunnel
+obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
new file mode 100644
index 000000000..55a7f72d5
--- /dev/null
+++ b/net/netfilter/core.c
@@ -0,0 +1,793 @@
+/* netfilter.c: look after the filters for various protocols.
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * This code is GPL.
+ */
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/rcupdate.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ipv6_ops);
+
+DEFINE_PER_CPU(bool, nf_skb_duplicated);
+EXPORT_SYMBOL_GPL(nf_skb_duplicated);
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks_needed);
+#endif
+
+static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT		1024
+
+#define nf_entry_dereference(e) \
+	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
+
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+	struct nf_hook_entries *e;
+	size_t alloc = sizeof(*e) +
+		       sizeof(struct nf_hook_entry) * num +
+		       sizeof(struct nf_hook_ops *) * num +
+		       sizeof(struct nf_hook_entries_rcu_head);
+
+	if (num == 0)
+		return NULL;
+
+	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
+	if (e)
+		e->num_hook_entries = num;
+	return e;
+}
+
+static void __nf_hook_entries_free(struct rcu_head *h)
+{
+	struct nf_hook_entries_rcu_head *head;
+
+	head = container_of(h, struct nf_hook_entries_rcu_head, head);
+	kvfree(head->allocation);
+}
+
+static void nf_hook_entries_free(struct nf_hook_entries *e)
+{
+	struct nf_hook_entries_rcu_head *head;
+	struct nf_hook_ops **ops;
+	unsigned int num;
+
+	if (!e)
+		return;
+
+	num = e->num_hook_entries;
+	ops = nf_hook_entries_get_hook_ops(e);
+	head = (void *)&ops[num];
+	head->allocation = e;
+	call_rcu(&head->head, __nf_hook_entries_free);
+}
+
+static unsigned int accept_all(void *priv,
+			       struct sk_buff *skb,
+			       const struct nf_hook_state *state)
+{
+	return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+	.hook = accept_all,
+	.priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+		     const struct nf_hook_ops *reg)
+{
+	unsigned int i, alloc_entries, nhooks, old_entries;
+	struct nf_hook_ops **orig_ops = NULL;
+	struct nf_hook_ops **new_ops;
+	struct nf_hook_entries *new;
+	bool inserted = false;
+
+	alloc_entries = 1;
+	old_entries = old ? old->num_hook_entries : 0;
+
+	if (old) {
+		orig_ops = nf_hook_entries_get_hook_ops(old);
+
+		for (i = 0; i < old_entries; i++) {
+			if (orig_ops[i] != &dummy_ops)
+				alloc_entries++;
+		}
+	}
+
+	if (alloc_entries > MAX_HOOK_COUNT)
+		return ERR_PTR(-E2BIG);
+
+	new = allocate_hook_entries_size(alloc_entries);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+
+	i = 0;
+	nhooks = 0;
+	while (i < old_entries) {
+		if (orig_ops[i] == &dummy_ops) {
+			++i;
+			continue;
+		}
+
+		if (inserted || reg->priority > orig_ops[i]->priority) {
+			new_ops[nhooks] = (void *)orig_ops[i];
+			new->hooks[nhooks] = old->hooks[i];
+			i++;
+		} else {
+			new_ops[nhooks] = (void *)reg;
+			new->hooks[nhooks].hook = reg->hook;
+			new->hooks[nhooks].priv = reg->priv;
+			inserted = true;
+		}
+		nhooks++;
+	}
+
+	if (!inserted) {
+		new_ops[nhooks] = (void *)reg;
+		new->hooks[nhooks].hook = reg->hook;
+		new->hooks[nhooks].priv = reg->priv;
+	}
+
+	return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_MISC
+	struct nf_hook_ops **orig_ops;
+	int prio = INT_MIN;
+	size_t i = 0;
+
+	orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+	for (i = 0; i < hooks->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+
+		WARN_ON(orig_ops[i]->priority < prio);
+
+		if (orig_ops[i]->priority > prio)
+			prio = orig_ops[i]->priority;
+	}
+#endif
+}
+
+int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp,
+				const struct nf_hook_ops *reg)
+{
+	struct nf_hook_entries *new_hooks;
+	struct nf_hook_entries *p;
+
+	p = rcu_dereference_raw(*pp);
+	new_hooks = nf_hook_entries_grow(p, reg);
+	if (IS_ERR(new_hooks))
+		return PTR_ERR(new_hooks);
+
+	hooks_validate(new_hooks);
+
+	rcu_assign_pointer(*pp, new_hooks);
+
+	BUG_ON(p == new_hooks);
+	nf_hook_entries_free(p);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw);
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @old -- current hook blob at @pp
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
+					  struct nf_hook_entries __rcu **pp)
+{
+	unsigned int i, j, skip = 0, hook_entries;
+	struct nf_hook_entries *new = NULL;
+	struct nf_hook_ops **orig_ops;
+	struct nf_hook_ops **new_ops;
+
+	if (WARN_ON_ONCE(!old))
+		return NULL;
+
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			skip++;
+	}
+
+	/* if skip == hook_entries all hooks have been removed */
+	hook_entries = old->num_hook_entries;
+	if (skip == hook_entries)
+		goto out_assign;
+
+	if (skip == 0)
+		return NULL;
+
+	hook_entries -= skip;
+	new = allocate_hook_entries_size(hook_entries);
+	if (!new)
+		return NULL;
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+		new->hooks[j] = old->hooks[i];
+		new_ops[j] = (void *)orig_ops[i];
+		j++;
+	}
+	hooks_validate(new);
+out_assign:
+	rcu_assign_pointer(*pp, new);
+	return old;
+}
+
+static struct nf_hook_entries __rcu **
+nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
+		   struct net_device *dev)
+{
+	switch (pf) {
+	case NFPROTO_NETDEV:
+		break;
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+	case NFPROTO_ARP:
+		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
+			return NULL;
+		return net->nf.hooks_arp + hooknum;
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+	case NFPROTO_BRIDGE:
+		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
+			return NULL;
+		return net->nf.hooks_bridge + hooknum;
+#endif
+#ifdef CONFIG_NETFILTER_INGRESS
+	case NFPROTO_INET:
+		if (WARN_ON_ONCE(hooknum != NF_INET_INGRESS))
+			return NULL;
+		if (!dev || dev_net(dev) != net) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+		return &dev->nf_hooks_ingress;
+#endif
+	case NFPROTO_IPV4:
+		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
+			return NULL;
+		return net->nf.hooks_ipv4 + hooknum;
+	case NFPROTO_IPV6:
+		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
+			return NULL;
+		return net->nf.hooks_ipv6 + hooknum;
+	default:
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
+#ifdef CONFIG_NETFILTER_INGRESS
+	if (hooknum == NF_NETDEV_INGRESS) {
+		if (dev && dev_net(dev) == net)
+			return &dev->nf_hooks_ingress;
+	}
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	if (hooknum == NF_NETDEV_EGRESS) {
+		if (dev && dev_net(dev) == net)
+			return &dev->nf_hooks_egress;
+	}
+#endif
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
+static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg,
+			    int hooknum)
+{
+#ifndef CONFIG_NETFILTER_INGRESS
+	if (reg->hooknum == hooknum)
+		return -EOPNOTSUPP;
+#endif
+	if (reg->hooknum != hooknum ||
+	    !reg->dev || dev_net(reg->dev) != net)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg,
+						  int pf)
+{
+	if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
+	    (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
+		return true;
+
+	return false;
+}
+
+static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg,
+						 int pf)
+{
+	return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
+static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
+{
+#ifdef CONFIG_JUMP_LABEL
+	int hooknum;
+
+	if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
+		pf = NFPROTO_NETDEV;
+		hooknum = NF_NETDEV_INGRESS;
+	} else {
+		hooknum = reg->hooknum;
+	}
+	static_key_slow_inc(&nf_hooks_needed[pf][hooknum]);
+#endif
+}
+
+static void nf_static_key_dec(const struct nf_hook_ops *reg, int pf)
+{
+#ifdef CONFIG_JUMP_LABEL
+	int hooknum;
+
+	if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
+		pf = NFPROTO_NETDEV;
+		hooknum = NF_NETDEV_INGRESS;
+	} else {
+		hooknum = reg->hooknum;
+	}
+	static_key_slow_dec(&nf_hooks_needed[pf][hooknum]);
+#endif
+}
+
+static int __nf_register_net_hook(struct net *net, int pf,
+				  const struct nf_hook_ops *reg)
+{
+	struct nf_hook_entries *p, *new_hooks;
+	struct nf_hook_entries __rcu **pp;
+	int err;
+
+	switch (pf) {
+	case NFPROTO_NETDEV:
+#ifndef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS)
+			return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+		if (reg->hooknum == NF_NETDEV_EGRESS)
+			return -EOPNOTSUPP;
+#endif
+		if ((reg->hooknum != NF_NETDEV_INGRESS &&
+		     reg->hooknum != NF_NETDEV_EGRESS) ||
+		    !reg->dev || dev_net(reg->dev) != net)
+			return -EINVAL;
+		break;
+	case NFPROTO_INET:
+		if (reg->hooknum != NF_INET_INGRESS)
+			break;
+
+		err = nf_ingress_check(net, reg, NF_INET_INGRESS);
+		if (err < 0)
+			return err;
+		break;
+	}
+
+	pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
+	if (!pp)
+		return -EINVAL;
+
+	mutex_lock(&nf_hook_mutex);
+
+	p = nf_entry_dereference(*pp);
+	new_hooks = nf_hook_entries_grow(p, reg);
+
+	if (!IS_ERR(new_hooks)) {
+		hooks_validate(new_hooks);
+		rcu_assign_pointer(*pp, new_hooks);
+	}
+
+	mutex_unlock(&nf_hook_mutex);
+	if (IS_ERR(new_hooks))
+		return PTR_ERR(new_hooks);
+
+#ifdef CONFIG_NETFILTER_INGRESS
+	if (nf_ingress_hook(reg, pf))
+		net_inc_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	if (nf_egress_hook(reg, pf))
+		net_inc_egress_queue();
+#endif
+	nf_static_key_inc(reg, pf);
+
+	BUG_ON(p == new_hooks);
+	nf_hook_entries_free(p);
+	return 0;
+}
+
+/*
+ * nf_remove_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static bool nf_remove_net_hook(struct nf_hook_entries *old,
+			       const struct nf_hook_ops *unreg)
+{
+	struct nf_hook_ops **orig_ops;
+	unsigned int i;
+
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] != unreg)
+			continue;
+		WRITE_ONCE(old->hooks[i].hook, accept_all);
+		WRITE_ONCE(orig_ops[i], (void *)&dummy_ops);
+		return true;
+	}
+
+	return false;
+}
+
+static void __nf_unregister_net_hook(struct net *net, int pf,
+				     const struct nf_hook_ops *reg)
+{
+	struct nf_hook_entries __rcu **pp;
+	struct nf_hook_entries *p;
+
+	pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
+	if (!pp)
+		return;
+
+	mutex_lock(&nf_hook_mutex);
+
+	p = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!p)) {
+		mutex_unlock(&nf_hook_mutex);
+		return;
+	}
+
+	if (nf_remove_net_hook(p, reg)) {
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (nf_ingress_hook(reg, pf))
+			net_dec_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+		if (nf_egress_hook(reg, pf))
+			net_dec_egress_queue();
+#endif
+		nf_static_key_dec(reg, pf);
+	} else {
+		WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum);
+	}
+
+	p = __nf_hook_entries_try_shrink(p, pp);
+	mutex_unlock(&nf_hook_mutex);
+	if (!p)
+		return;
+
+	nf_queue_nf_hook_drop(net);
+	nf_hook_entries_free(p);
+}
+
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+	if (reg->pf == NFPROTO_INET) {
+		if (reg->hooknum == NF_INET_INGRESS) {
+			__nf_unregister_net_hook(net, NFPROTO_INET, reg);
+		} else {
+			__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+			__nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
+		}
+	} else {
+		__nf_unregister_net_hook(net, reg->pf, reg);
+	}
+}
+EXPORT_SYMBOL(nf_unregister_net_hook);
+
+void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
+				const struct nf_hook_ops *reg)
+{
+	struct nf_hook_entries *p;
+
+	p = rcu_dereference_raw(*pp);
+	if (nf_remove_net_hook(p, reg)) {
+		p = __nf_hook_entries_try_shrink(p, pp);
+		nf_hook_entries_free(p);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw);
+
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+	int err;
+
+	if (reg->pf == NFPROTO_INET) {
+		if (reg->hooknum == NF_INET_INGRESS) {
+			err = __nf_register_net_hook(net, NFPROTO_INET, reg);
+			if (err < 0)
+				return err;
+		} else {
+			err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
+			if (err < 0)
+				return err;
+
+			err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
+			if (err < 0) {
+				__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+				return err;
+			}
+		}
+	} else {
+		err = __nf_register_net_hook(net, reg->pf, reg);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nf_register_net_hook);
+
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+			  unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = nf_register_net_hook(net, &reg[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		nf_unregister_net_hooks(net, reg, i);
+	return err;
+}
+EXPORT_SYMBOL(nf_register_net_hooks);
+
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+			     unsigned int hookcount)
+{
+	unsigned int i;
+
+	for (i = 0; i < hookcount; i++)
+		nf_unregister_net_hook(net, &reg[i]);
+}
+EXPORT_SYMBOL(nf_unregister_net_hooks);
+
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+		 const struct nf_hook_entries *e, unsigned int s)
+{
+	unsigned int verdict;
+	int ret;
+
+	for (; s < e->num_hook_entries; s++) {
+		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
+		switch (verdict & NF_VERDICT_MASK) {
+		case NF_ACCEPT:
+			break;
+		case NF_DROP:
+			kfree_skb_reason(skb,
+					 SKB_DROP_REASON_NETFILTER_DROP);
+			ret = NF_DROP_GETERR(verdict);
+			if (ret == 0)
+				ret = -EPERM;
+			return ret;
+		case NF_QUEUE:
+			ret = nf_queue(skb, state, s, verdict);
+			if (ret == 1)
+				continue;
+			return ret;
+		default:
+			/* Implicit handling for NF_STOLEN, as well as any other
+			 * non conventional verdicts.
+			 */
+			return 0;
+		}
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(nf_hook_slow);
+
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+		       const struct nf_hook_entries *e)
+{
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+	int ret;
+
+	INIT_LIST_HEAD(&sublist);
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		skb_list_del_init(skb);
+		ret = nf_hook_slow(skb, state, e, 0);
+		if (ret == 1)
+			list_add_tail(&skb->list, &sublist);
+	}
+	/* Put passed packets back on main list */
+	list_splice(&sublist, head);
+}
+EXPORT_SYMBOL(nf_hook_slow_list);
+
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+
+const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_hook);
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+u8 nf_ctnetlink_has_listener;
+EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
+
+const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_hook);
+
+/* This does not belong here, but locally generated errors need it if connection
+ * tracking in use: without this, connection may not be in hash table, and hence
+ * manufactured ICMP or RST packets will not be associated with it.
+ */
+void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
+{
+	const struct nf_ct_hook *ct_hook;
+
+	if (skb->_nfct) {
+		rcu_read_lock();
+		ct_hook = rcu_dereference(nf_ct_hook);
+		if (ct_hook)
+			ct_hook->attach(new, skb);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(nf_ct_attach);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+	const struct nf_ct_hook *ct_hook;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ct_hook->destroy(nfct);
+	rcu_read_unlock();
+
+	WARN_ON(!ct_hook);
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	const struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
+/* Built-in default zone used e.g. by modules. */
+const struct nf_conntrack_zone nf_ct_zone_dflt = {
+	.id	= NF_CT_DEFAULT_ZONE_ID,
+	.dir	= NF_CT_DEFAULT_ZONE_DIR,
+};
+EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
+#endif /* CONFIG_NF_CONNTRACK */
+
+static void __net_init
+__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
+{
+	int h;
+
+	for (h = 0; h < max; h++)
+		RCU_INIT_POINTER(e[h], NULL);
+}
+
+static int __net_init netfilter_net_init(struct net *net)
+{
+	__netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
+	__netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+	__netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+	__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
+#endif
+#ifdef CONFIG_PROC_FS
+	net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+						net->proc_net);
+	if (!net->nf.proc_netfilter) {
+		if (!net_eq(net, &init_net))
+			pr_err("cannot create netfilter proc entry");
+
+		return -ENOMEM;
+	}
+#endif
+
+	return 0;
+}
+
+static void __net_exit netfilter_net_exit(struct net *net)
+{
+	remove_proc_entry("netfilter", net->proc_net);
+}
+
+static struct pernet_operations netfilter_net_ops = {
+	.init = netfilter_net_init,
+	.exit = netfilter_net_exit,
+};
+
+int __init netfilter_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&netfilter_net_ops);
+	if (ret < 0)
+		goto err;
+
+	ret = netfilter_log_init();
+	if (ret < 0)
+		goto err_pernet;
+
+	return 0;
+err_pernet:
+	unregister_pernet_subsys(&netfilter_net_ops);
+err:
+	return ret;
+}
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
new file mode 100644
index 000000000..3c273483d
--- /dev/null
+++ b/net/netfilter/ipset/Kconfig
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig IP_SET
+	tristate "IP set support"
+	depends on INET && NETFILTER
+	select NETFILTER_NETLINK
+	help
+	  This option adds IP set support to the kernel.
+	  In order to define and use the sets, you need the userspace utility
+	  ipset(8). You can use the sets in netfilter via the "set" match
+	  and "SET" target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+if IP_SET
+
+config IP_SET_MAX
+	int "Maximum number of IP sets"
+	default 256
+	range 2 65534
+	depends on IP_SET
+	help
+	  You can define here default value of the maximum number 
+	  of IP sets for the kernel.
+
+	  The value can be overridden by the 'max_sets' module
+	  parameter of the 'ip_set' module.
+
+config IP_SET_BITMAP_IP
+	tristate "bitmap:ip set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:ip set type support, by which one
+	  can store IPv4 addresses (or network addresse) from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_BITMAP_IPMAC
+	tristate "bitmap:ip,mac set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:ip,mac set type support, by which one
+	  can store IPv4 address and (source) MAC address pairs from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_BITMAP_PORT
+	tristate "bitmap:port set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:port set type support, by which one
+	  can store TCP/UDP port numbers from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IP
+	tristate "hash:ip set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip set type support, by which one
+	  can store arbitrary IPv4 or IPv6 addresses (or network addresses)
+	  in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPMARK
+	tristate "hash:ip,mark set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,mark set type support, by which one
+	  can store IPv4/IPv6 address and mark pairs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORT
+	tristate "hash:ip,port set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port set type support, by which one
+	  can store IPv4/IPv6 address and protocol/port pairs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORTIP
+	tristate "hash:ip,port,ip set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port,ip set type support, by which
+	  one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
+	  address triples in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORTNET
+	tristate "hash:ip,port,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port,net set type support, by which
+	  one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
+	  network address/prefix triples in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPMAC
+	tristate "hash:ip,mac set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,mac set type support, by which
+	  one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_MAC
+	tristate "hash:mac set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:mac set type support, by which
+	  one can store MAC (ethernet address) elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NETPORTNET
+	tristate "hash:net,port,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,port,net set type support, by which
+	  one can store two IPv4/IPv6 subnets, and a protocol/port in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NET
+	tristate "hash:net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net set type support, by which
+	  one can store IPv4/IPv6 network address/prefix elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NETNET
+	tristate "hash:net,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,net  set type support, by which
+	  one can store IPv4/IPv6 network address/prefix pairs in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NETPORT
+	tristate "hash:net,port set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,port set type support, by which
+	  one can store IPv4/IPv6 network address/prefix and
+	  protocol/port pairs as elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NETIFACE
+	tristate "hash:net,iface set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,iface set type support, by which
+	  one can store IPv4/IPv6 network address/prefix and
+	  interface name pairs as elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_LIST_SET
+	tristate "list:set set support"
+	depends on IP_SET
+	help
+	  This option adds the list:set set type support. In this
+	  kind of set one can store the name of other sets and it forms
+	  an ordered union of the member sets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+endif # IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
new file mode 100644
index 000000000..a445a6bf4
--- /dev/null
+++ b/net/netfilter/ipset/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the ipset modules
+#
+
+ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o
+
+# ipset core
+obj-$(CONFIG_IP_SET) += ip_set.o
+
+# bitmap types
+obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o
+obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o
+obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
+
+# hash types
+obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o
+obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
+obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
+obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
+obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
+obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
+obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
+obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
+obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o
+obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o
+
+# list types
+obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
new file mode 100644
index 000000000..26ab0e961
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+#ifndef __IP_SET_BITMAP_IP_GEN_H
+#define __IP_SET_BITMAP_IP_GEN_H
+
+#define mtype_do_test		IPSET_TOKEN(MTYPE, _do_test)
+#define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
+#define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
+#define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
+#define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem		IPSET_TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout	IPSET_TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_memsize		IPSET_TOKEN(MTYPE, _memsize)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype			MTYPE
+
+#define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
+
+static void
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
+{
+	struct mtype *map = set->data;
+
+	timer_setup(&map->gc, gc, 0);
+	mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
+}
+
+static void
+mtype_ext_cleanup(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+	u32 id;
+
+	for (id = 0; id < map->elements; id++)
+		if (test_bit(id, map->members))
+			ip_set_ext_destroy(set, get_ext(set, map, id));
+}
+
+static void
+mtype_destroy(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+
+	if (SET_WITH_TIMEOUT(set))
+		del_timer_sync(&map->gc);
+
+	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	ip_set_free(map->members);
+	ip_set_free(map);
+
+	set->data = NULL;
+}
+
+static void
+mtype_flush(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+
+	if (set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	bitmap_zero(map->members, map->elements);
+	set->elements = 0;
+	set->ext_size = 0;
+}
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_memsize(const struct mtype *map, size_t dsize)
+{
+	return sizeof(*map) + map->memsize +
+	       map->elements * dsize;
+}
+
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct mtype *map = set->data;
+	struct nlattr *nested;
+	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
+
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	if (mtype_do_head(skb, map) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
+		goto nla_put_failure;
+	nla_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	   struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_test(e, map, set->dsize);
+
+	if (ret <= 0)
+		return ret;
+	return ip_set_match_extensions(set, ext, mext, flags, x);
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_add(e, map, flags, set->dsize);
+
+	if (ret == IPSET_ADD_FAILED) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(x, set))) {
+			set->elements--;
+			ret = 0;
+		} else if (!(flags & IPSET_FLAG_EXIST)) {
+			set_bit(e->id, map->members);
+			return -IPSET_ERR_EXIST;
+		}
+		/* Element is re-added, cleanup extensions */
+		ip_set_ext_destroy(set, x);
+	}
+	if (ret > 0)
+		set->elements--;
+
+	if (SET_WITH_TIMEOUT(set))
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+		mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret);
+#else
+		ip_set_timeout_set(ext_timeout(x, set), ext->timeout);
+#endif
+
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(x, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(set, ext_comment(x, set), ext);
+	if (SET_WITH_SKBINFO(set))
+		ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
+
+	/* Activate element */
+	set_bit(e->id, map->members);
+	set->elements++;
+
+	return 0;
+}
+
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct mtype *map = set->data;
+	const struct mtype_adt_elem *e = value;
+	void *x = get_ext(set, map, e->id);
+
+	if (mtype_do_del(e, map))
+		return -IPSET_ERR_EXIST;
+
+	ip_set_ext_destroy(set, x);
+	set->elements--;
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(x, set)))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+#ifndef IP_SET_BITMAP_STORED_TIMEOUT
+static bool
+mtype_is_filled(const struct mtype_elem *x)
+{
+	return true;
+}
+#endif
+
+static int
+mtype_list(const struct ip_set *set,
+	   struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct mtype *map = set->data;
+	struct nlattr *adt, *nested;
+	void *x;
+	u32 id, first = cb->args[IPSET_CB_ARG0];
+	int ret = 0;
+
+	adt = nla_nest_start(skb, IPSET_ATTR_ADT);
+	if (!adt)
+		return -EMSGSIZE;
+	/* Extensions may be replaced */
+	rcu_read_lock();
+	for (; cb->args[IPSET_CB_ARG0] < map->elements;
+	     cb->args[IPSET_CB_ARG0]++) {
+		cond_resched_rcu();
+		id = cb->args[IPSET_CB_ARG0];
+		x = get_ext(set, map, id);
+		if (!test_bit(id, map->members) ||
+		    (SET_WITH_TIMEOUT(set) &&
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+		     mtype_is_filled(x) &&
+#endif
+		     ip_set_timeout_expired(ext_timeout(x, set))))
+			continue;
+		nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, adt);
+				ret = -EMSGSIZE;
+				goto out;
+			}
+
+			goto nla_put_failure;
+		}
+		if (mtype_do_list(skb, map, id, set->dsize))
+			goto nla_put_failure;
+		if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x)))
+			goto nla_put_failure;
+		nla_nest_end(skb, nested);
+	}
+	nla_nest_end(skb, adt);
+
+	/* Set listing finished */
+	cb->args[IPSET_CB_ARG0] = 0;
+
+	goto out;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	if (unlikely(id == first)) {
+		cb->args[IPSET_CB_ARG0] = 0;
+		ret = -EMSGSIZE;
+	}
+	nla_nest_end(skb, adt);
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static void
+mtype_gc(struct timer_list *t)
+{
+	struct mtype *map = from_timer(map, t, gc);
+	struct ip_set *set = map->set;
+	void *x;
+	u32 id;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out
+	 */
+	spin_lock_bh(&set->lock);
+	for (id = 0; id < map->elements; id++)
+		if (mtype_gc_test(id, map, set->dsize)) {
+			x = get_ext(set, map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, set))) {
+				clear_bit(id, map->members);
+				ip_set_ext_destroy(set, x);
+				set->elements--;
+			}
+		}
+	spin_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static const struct ip_set_type_variant mtype = {
+	.kadt	= mtype_kadt,
+	.uadt	= mtype_uadt,
+	.adt	= {
+		[IPSET_ADD] = mtype_add,
+		[IPSET_DEL] = mtype_del,
+		[IPSET_TEST] = mtype_test,
+	},
+	.destroy = mtype_destroy,
+	.flush	= mtype_flush,
+	.head	= mtype_head,
+	.list	= mtype_list,
+	.same_set = mtype_same_set,
+};
+
+#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
new file mode 100644
index 000000000..e4fa00abd
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
+ */
+
+/* Kernel module implementing an IP set type: the bitmap:ip type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+/*				2	   Comment support added */
+#define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_bitmap:ip");
+
+#define MTYPE		bitmap_ip
+#define HOST_MASK	32
+
+/* Type structure */
+struct bitmap_ip {
+	unsigned long *members;	/* the set members */
+	u32 first_ip;		/* host byte order, included in range */
+	u32 last_ip;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
+	u32 hosts;		/* number of hosts in a subnet */
+	size_t memsize;		/* members size */
+	u8 netmask;		/* subnet netmask */
+	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
+	unsigned char extensions[]	/* data extensions */
+		__aligned(__alignof__(u64));
+};
+
+/* ADT structure for generic function args */
+struct bitmap_ip_adt_elem {
+	u16 id;
+};
+
+static u32
+ip_to_id(const struct bitmap_ip *m, u32 ip)
+{
+	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
+}
+
+/* Common functions */
+
+static int
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
+		  struct bitmap_ip *map, size_t dsize)
+{
+	return !!test_bit(e->id, map->members);
+}
+
+static int
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
+{
+	return !!test_bit(id, map->members);
+}
+
+static int
+bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
+		 u32 flags, size_t dsize)
+{
+	return !!test_bit(e->id, map->members);
+}
+
+static int
+bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
+{
+	return !test_and_clear_bit(e->id, map->members);
+}
+
+static int
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
+		  size_t dsize)
+{
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+			htonl(map->first_ip + id * map->hosts));
+}
+
+static int
+bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
+{
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
+	       (map->netmask != 32 &&
+		nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask));
+}
+
+static int
+bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct bitmap_ip *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_ip_adt_elem e = { .id = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+	u32 ip;
+
+	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	e.id = ip_to_id(map, ip);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct bitmap_ip *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	u32 ip = 0, ip_to = 0;
+	struct bitmap_ip_adt_elem e = { .id = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret = 0;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP]))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	if (adt == IPSET_TEST) {
+		e.id = ip_to_id(map, ip);
+		return adtfn(set, &e, &ext, &ext, flags);
+	}
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to) {
+			swap(ip, ip_to);
+			if (ip < map->first_ip)
+				return -IPSET_ERR_BITMAP_RANGE;
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	} else {
+		ip_to = ip;
+	}
+
+	if (ip_to > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	for (; !before(ip_to, ip); ip += map->hosts) {
+		e.id = ip_to_id(map, ip);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static bool
+bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_ip *x = a->data;
+	const struct bitmap_ip *y = b->data;
+
+	return x->first_ip == y->first_ip &&
+	       x->last_ip == y->last_ip &&
+	       x->netmask == y->netmask &&
+	       a->timeout == b->timeout &&
+	       a->extensions == b->extensions;
+}
+
+/* Plain variant */
+
+struct bitmap_ip_elem {
+};
+
+#include "ip_set_bitmap_gen.h"
+
+/* Create bitmap:ip type of sets */
+
+static bool
+init_map_ip(struct ip_set *set, struct bitmap_ip *map,
+	    u32 first_ip, u32 last_ip,
+	    u32 elements, u32 hosts, u8 netmask)
+{
+	map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
+	if (!map->members)
+		return false;
+	map->first_ip = first_ip;
+	map->last_ip = last_ip;
+	map->elements = elements;
+	map->hosts = hosts;
+	map->netmask = netmask;
+	set->timeout = IPSET_NO_TIMEOUT;
+
+	map->set = set;
+	set->data = map;
+	set->family = NFPROTO_IPV4;
+
+	return true;
+}
+
+static u32
+range_to_mask(u32 from, u32 to, u8 *bits)
+{
+	u32 mask = 0xFFFFFFFE;
+
+	*bits = 32;
+	while (--(*bits) > 0 && mask && (to & mask) != from)
+		mask <<= 1;
+
+	return mask;
+}
+
+static int
+bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		 u32 flags)
+{
+	struct bitmap_ip *map;
+	u32 first_ip = 0, last_ip = 0, hosts;
+	u64 elements;
+	u8 netmask = 32;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
+		if (ret)
+			return ret;
+		if (first_ip > last_ip)
+			swap(first_ip, last_ip);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr >= HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(first_ip, last_ip, cidr);
+	} else {
+		return -IPSET_ERR_PROTOCOL;
+	}
+
+	if (tb[IPSET_ATTR_NETMASK]) {
+		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+		if (netmask > HOST_MASK)
+			return -IPSET_ERR_INVALID_NETMASK;
+
+		first_ip &= ip_set_hostmask(netmask);
+		last_ip |= ~ip_set_hostmask(netmask);
+	}
+
+	if (netmask == 32) {
+		hosts = 1;
+		elements = (u64)last_ip - first_ip + 1;
+	} else {
+		u8 mask_bits;
+		u32 mask;
+
+		mask = range_to_mask(first_ip, last_ip, &mask_bits);
+
+		if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) ||
+		    netmask <= mask_bits)
+			return -IPSET_ERR_BITMAP_RANGE;
+
+		pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
+		hosts = 2U << (32 - netmask - 1);
+		elements = 2UL << (netmask - mask_bits - 1);
+	}
+	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+		return -IPSET_ERR_BITMAP_RANGE_SIZE;
+
+	pr_debug("hosts %u, elements %llu\n",
+		 hosts, (unsigned long long)elements);
+
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
+	if (!map)
+		return -ENOMEM;
+
+	map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
+	set->variant = &bitmap_ip;
+	if (!init_map_ip(set, map, first_ip, last_ip,
+			 elements, hosts, netmask)) {
+		ip_set_free(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		bitmap_ip_gc_init(set, bitmap_ip_gc);
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_ip_type __read_mostly = {
+	.name		= "bitmap:ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_IPV4,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= bitmap_ip_create,
+	.create_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_ip_init(void)
+{
+	return ip_set_type_register(&bitmap_ip_type);
+}
+
+static void __exit
+bitmap_ip_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&bitmap_ip_type);
+}
+
+module_init(bitmap_ip_init);
+module_exit(bitmap_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
new file mode 100644
index 000000000..2c625e0f4
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ *			   Martin Josefsson <gandalf@wlug.westbo.se>
+ */
+
+/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+/*				2	   Comment support added */
+#define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_bitmap:ip,mac");
+
+#define MTYPE		bitmap_ipmac
+#define HOST_MASK	32
+#define IP_SET_BITMAP_STORED_TIMEOUT
+
+enum {
+	MAC_UNSET,		/* element is set, without MAC */
+	MAC_FILLED,		/* element is set with MAC */
+};
+
+/* Type structure */
+struct bitmap_ipmac {
+	unsigned long *members;	/* the set members */
+	u32 first_ip;		/* host byte order, included in range */
+	u32 last_ip;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
+	size_t memsize;		/* members size */
+	struct timer_list gc;	/* garbage collector */
+	struct ip_set *set;	/* attached to this ip_set */
+	unsigned char extensions[]	/* MAC + data extensions */
+		__aligned(__alignof__(u64));
+};
+
+/* ADT structure for generic function args */
+struct bitmap_ipmac_adt_elem {
+	unsigned char ether[ETH_ALEN] __aligned(2);
+	u16 id;
+	u16 add_mac;
+};
+
+struct bitmap_ipmac_elem {
+	unsigned char ether[ETH_ALEN];
+	unsigned char filled;
+} __aligned(__alignof__(u64));
+
+static u32
+ip_to_id(const struct bitmap_ipmac *m, u32 ip)
+{
+	return ip - m->first_ip;
+}
+
+#define get_elem(extensions, id, dsize)		\
+	(struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize)	\
+	(const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+/* Common functions */
+
+static int
+bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
+		     const struct bitmap_ipmac *map, size_t dsize)
+{
+	const struct bitmap_ipmac_elem *elem;
+
+	if (!test_bit(e->id, map->members))
+		return 0;
+	elem = get_const_elem(map->extensions, e->id, dsize);
+	if (e->add_mac && elem->filled == MAC_FILLED)
+		return ether_addr_equal(e->ether, elem->ether);
+	/* Trigger kernel to fill out the ethernet address */
+	return -EAGAIN;
+}
+
+static int
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
+{
+	const struct bitmap_ipmac_elem *elem;
+
+	if (!test_bit(id, map->members))
+		return 0;
+	elem = get_const_elem(map->extensions, id, dsize);
+	/* Timer not started for the incomplete elements */
+	return elem->filled == MAC_FILLED;
+}
+
+static int
+bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
+{
+	return elem->filled == MAC_FILLED;
+}
+
+static int
+bitmap_ipmac_add_timeout(unsigned long *timeout,
+			 const struct bitmap_ipmac_adt_elem *e,
+			 const struct ip_set_ext *ext, struct ip_set *set,
+			 struct bitmap_ipmac *map, int mode)
+{
+	u32 t = ext->timeout;
+
+	if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
+		if (t == set->timeout)
+			/* Timeout was not specified, get stored one */
+			t = *timeout;
+		ip_set_timeout_set(timeout, t);
+	} else {
+		/* If MAC is unset yet, we store plain timeout value
+		 * because the timer is not activated yet
+		 * and we can reuse it later when MAC is filled out,
+		 * possibly by the kernel
+		 */
+		if (e->add_mac)
+			ip_set_timeout_set(timeout, t);
+		else
+			*timeout = t;
+	}
+	return 0;
+}
+
+static int
+bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
+		    struct bitmap_ipmac *map, u32 flags, size_t dsize)
+{
+	struct bitmap_ipmac_elem *elem;
+
+	elem = get_elem(map->extensions, e->id, dsize);
+	if (test_bit(e->id, map->members)) {
+		if (elem->filled == MAC_FILLED) {
+			if (e->add_mac &&
+			    (flags & IPSET_FLAG_EXIST) &&
+			    !ether_addr_equal(e->ether, elem->ether)) {
+				/* memcpy isn't atomic */
+				clear_bit(e->id, map->members);
+				smp_mb__after_atomic();
+				ether_addr_copy(elem->ether, e->ether);
+			}
+			return IPSET_ADD_FAILED;
+		} else if (!e->add_mac)
+			/* Already added without ethernet address */
+			return IPSET_ADD_FAILED;
+		/* Fill the MAC address and trigger the timer activation */
+		clear_bit(e->id, map->members);
+		smp_mb__after_atomic();
+		ether_addr_copy(elem->ether, e->ether);
+		elem->filled = MAC_FILLED;
+		return IPSET_ADD_START_STORED_TIMEOUT;
+	} else if (e->add_mac) {
+		/* We can store MAC too */
+		ether_addr_copy(elem->ether, e->ether);
+		elem->filled = MAC_FILLED;
+		return 0;
+	}
+	elem->filled = MAC_UNSET;
+	/* MAC is not stored yet, don't start timer */
+	return IPSET_ADD_STORE_PLAIN_TIMEOUT;
+}
+
+static int
+bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
+		    struct bitmap_ipmac *map)
+{
+	return !test_and_clear_bit(e->id, map->members);
+}
+
+static int
+bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
+		     u32 id, size_t dsize)
+{
+	const struct bitmap_ipmac_elem *elem =
+		get_const_elem(map->extensions, id, dsize);
+
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+			       htonl(map->first_ip + id)) ||
+	       (elem->filled == MAC_FILLED &&
+		nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
+}
+
+static int
+bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
+{
+	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+}
+
+static int
+bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct bitmap_ipmac *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+	u32 ip;
+
+	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	/* Backward compatibility: we don't check the second flag */
+	if (skb_mac_header(skb) < skb->head ||
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+		return -EINVAL;
+
+	e.id = ip_to_id(map, ip);
+
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
+		return -EINVAL;
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct bitmap_ipmac *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_ipmac_adt_elem e = { .id = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0;
+	int ret = 0;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP]))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	e.id = ip_to_id(map, ip);
+	if (tb[IPSET_ATTR_ETHER]) {
+		if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)
+			return -IPSET_ERR_PROTOCOL;
+		memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+		e.add_mac = 1;
+	}
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static bool
+bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_ipmac *x = a->data;
+	const struct bitmap_ipmac *y = b->data;
+
+	return x->first_ip == y->first_ip &&
+	       x->last_ip == y->last_ip &&
+	       a->timeout == b->timeout &&
+	       a->extensions == b->extensions;
+}
+
+/* Plain variant */
+
+#include "ip_set_bitmap_gen.h"
+
+/* Create bitmap:ip,mac type of sets */
+
+static bool
+init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
+	       u32 first_ip, u32 last_ip, u32 elements)
+{
+	map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
+	if (!map->members)
+		return false;
+	map->first_ip = first_ip;
+	map->last_ip = last_ip;
+	map->elements = elements;
+	set->timeout = IPSET_NO_TIMEOUT;
+
+	map->set = set;
+	set->data = map;
+	set->family = NFPROTO_IPV4;
+
+	return true;
+}
+
+static int
+bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		    u32 flags)
+{
+	u32 first_ip = 0, last_ip = 0;
+	u64 elements;
+	struct bitmap_ipmac *map;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
+		if (ret)
+			return ret;
+		if (first_ip > last_ip)
+			swap(first_ip, last_ip);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr >= HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(first_ip, last_ip, cidr);
+	} else {
+		return -IPSET_ERR_PROTOCOL;
+	}
+
+	elements = (u64)last_ip - first_ip + 1;
+
+	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+		return -IPSET_ERR_BITMAP_RANGE_SIZE;
+
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem),
+				     __alignof__(struct bitmap_ipmac_elem));
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
+	if (!map)
+		return -ENOMEM;
+
+	map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
+	set->variant = &bitmap_ipmac;
+	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
+		ip_set_free(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_ipmac_type = {
+	.name		= "bitmap:ip,mac",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_IPV4,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= bitmap_ipmac_create,
+	.create_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_ETHER]	= { .type = NLA_BINARY,
+					    .len  = ETH_ALEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_ipmac_init(void)
+{
+	return ip_set_type_register(&bitmap_ipmac_type);
+}
+
+static void __exit
+bitmap_ipmac_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&bitmap_ipmac_type);
+}
+
+module_init(bitmap_ipmac_init);
+module_exit(bitmap_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
new file mode 100644
index 000000000..7138e080d
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the bitmap:port type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counter support added */
+/*				2	   Comment support added */
+#define IPSET_TYPE_REV_MAX	3	/* skbinfo support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_bitmap:port");
+
+#define MTYPE		bitmap_port
+
+/* Type structure */
+struct bitmap_port {
+	unsigned long *members;	/* the set members */
+	u16 first_port;		/* host byte order, included in range */
+	u16 last_port;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
+	size_t memsize;		/* members size */
+	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
+	unsigned char extensions[]	/* data extensions */
+		__aligned(__alignof__(u64));
+};
+
+/* ADT structure for generic function args */
+struct bitmap_port_adt_elem {
+	u16 id;
+};
+
+static u16
+port_to_id(const struct bitmap_port *m, u16 port)
+{
+	return port - m->first_port;
+}
+
+/* Common functions */
+
+static int
+bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
+		    const struct bitmap_port *map, size_t dsize)
+{
+	return !!test_bit(e->id, map->members);
+}
+
+static int
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
+{
+	return !!test_bit(id, map->members);
+}
+
+static int
+bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
+		   struct bitmap_port *map, u32 flags, size_t dsize)
+{
+	return !!test_bit(e->id, map->members);
+}
+
+static int
+bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
+		   struct bitmap_port *map)
+{
+	return !test_and_clear_bit(e->id, map->members);
+}
+
+static int
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
+		    size_t dsize)
+{
+	return nla_put_net16(skb, IPSET_ATTR_PORT,
+			     htons(map->first_port + id));
+}
+
+static int
+bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
+{
+	return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
+	       nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
+}
+
+static bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+	bool ret;
+	u8 proto;
+
+	switch (pf) {
+	case NFPROTO_IPV4:
+		ret = ip_set_get_ip4_port(skb, src, port, &proto);
+		break;
+	case NFPROTO_IPV6:
+		ret = ip_set_get_ip6_port(skb, src, port, &proto);
+		break;
+	default:
+		return false;
+	}
+	if (!ret)
+		return ret;
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int
+bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
+		 const struct xt_action_param *par,
+		 enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct bitmap_port *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_port_adt_elem e = { .id = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+	__be16 __port;
+	u16 port = 0;
+
+	if (!ip_set_get_ip_port(skb, opt->family,
+				opt->flags & IPSET_DIM_ONE_SRC, &__port))
+		return -EINVAL;
+
+	port = ntohs(__port);
+
+	if (port < map->first_port || port > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	e.id = port_to_id(map, port);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
+		 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct bitmap_port *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct bitmap_port_adt_elem e = { .id = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port;	/* wraparound */
+	u16 port_to;
+	int ret = 0;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+		return -IPSET_ERR_PROTOCOL;
+
+	port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+	if (port < map->first_port || port > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (adt == IPSET_TEST) {
+		e.id = port_to_id(map, port);
+		return adtfn(set, &e, &ext, &ext, flags);
+	}
+
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to) {
+			swap(port, port_to);
+			if (port < map->first_port)
+				return -IPSET_ERR_BITMAP_RANGE;
+		}
+	} else {
+		port_to = port;
+	}
+
+	if (port_to > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	for (; port <= port_to; port++) {
+		e.id = port_to_id(map, port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static bool
+bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_port *x = a->data;
+	const struct bitmap_port *y = b->data;
+
+	return x->first_port == y->first_port &&
+	       x->last_port == y->last_port &&
+	       a->timeout == b->timeout &&
+	       a->extensions == b->extensions;
+}
+
+/* Plain variant */
+
+struct bitmap_port_elem {
+};
+
+#include "ip_set_bitmap_gen.h"
+
+/* Create bitmap:ip type of sets */
+
+static bool
+init_map_port(struct ip_set *set, struct bitmap_port *map,
+	      u16 first_port, u16 last_port)
+{
+	map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
+	if (!map->members)
+		return false;
+	map->first_port = first_port;
+	map->last_port = last_port;
+	set->timeout = IPSET_NO_TIMEOUT;
+
+	map->set = set;
+	set->data = map;
+	set->family = NFPROTO_UNSPEC;
+
+	return true;
+}
+
+static int
+bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		   u32 flags)
+{
+	struct bitmap_port *map;
+	u16 first_port, last_port;
+	u32 elements;
+
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+	last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (first_port > last_port)
+		swap(first_port, last_port);
+
+	elements = last_port - first_port + 1;
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
+	if (!map)
+		return -ENOMEM;
+
+	map->elements = elements;
+	map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
+	set->variant = &bitmap_port;
+	if (!init_map_port(set, map, first_port, last_port)) {
+		ip_set_free(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		bitmap_port_gc_init(set, bitmap_port_gc);
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_port_type = {
+	.name		= "bitmap:port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_PORT,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= bitmap_port_create,
+	.create_policy	= {
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_port_init(void)
+{
+	return ip_set_type_register(&bitmap_port_type);
+}
+
+static void __exit
+bitmap_port_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&bitmap_port_type);
+}
+
+module_init(bitmap_port_init);
+module_exit(bitmap_port_fini);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
new file mode 100644
index 000000000..d47dfdcb8
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -0,0 +1,2422 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
+ */
+
+/* Kernel module for IP set management */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/rculist.h>
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/ipset/ip_set.h>
+
+static LIST_HEAD(ip_set_type_list);		/* all registered set types */
+static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
+static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
+
+struct ip_set_net {
+	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
+	ip_set_id_t	ip_set_max;	/* max number of sets */
+	bool		is_deleted;	/* deleted by ip_set_net_exit */
+	bool		is_destroyed;	/* all sets are destroyed */
+};
+
+static unsigned int ip_set_net_id __read_mostly;
+
+static struct ip_set_net *ip_set_pernet(struct net *net)
+{
+	return net_generic(net, ip_set_net_id);
+}
+
+#define IP_SET_INC	64
+#define STRNCMP(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
+
+static unsigned int max_sets;
+
+module_param(max_sets, int, 0600);
+MODULE_PARM_DESC(max_sets, "maximal number of sets");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+MODULE_DESCRIPTION("core IP set support");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
+
+/* When the nfnl mutex or ip_set_ref_lock is held: */
+#define ip_set_dereference(p)		\
+	rcu_dereference_protected(p,	\
+		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+		lockdep_is_held(&ip_set_ref_lock))
+#define ip_set(inst, id)		\
+	ip_set_dereference((inst)->ip_set_list)[id]
+#define ip_set_ref_netlink(inst,id)	\
+	rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p)	\
+	rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+
+/* The set types are implemented in modules and registered set types
+ * can be found in ip_set_type_list. Adding/deleting types is
+ * serialized by ip_set_type_mutex.
+ */
+
+static void
+ip_set_type_lock(void)
+{
+	mutex_lock(&ip_set_type_mutex);
+}
+
+static void
+ip_set_type_unlock(void)
+{
+	mutex_unlock(&ip_set_type_mutex);
+}
+
+/* Register and deregister settype */
+
+static struct ip_set_type *
+find_set_type(const char *name, u8 family, u8 revision)
+{
+	struct ip_set_type *type;
+
+	list_for_each_entry_rcu(type, &ip_set_type_list, list,
+				lockdep_is_held(&ip_set_type_mutex))
+		if (STRNCMP(type->name, name) &&
+		    (type->family == family ||
+		     type->family == NFPROTO_UNSPEC) &&
+		    revision >= type->revision_min &&
+		    revision <= type->revision_max)
+			return type;
+	return NULL;
+}
+
+/* Unlock, try to load a set type module and lock again */
+static bool
+load_settype(const char *name)
+{
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
+	pr_debug("try to load ip_set_%s\n", name);
+	if (request_module("ip_set_%s", name) < 0) {
+		pr_warn("Can't find ip_set type %s\n", name);
+		nfnl_lock(NFNL_SUBSYS_IPSET);
+		return false;
+	}
+	nfnl_lock(NFNL_SUBSYS_IPSET);
+	return true;
+}
+
+/* Find a set type and reference it */
+#define find_set_type_get(name, family, revision, found)	\
+	__find_set_type_get(name, family, revision, found, false)
+
+static int
+__find_set_type_get(const char *name, u8 family, u8 revision,
+		    struct ip_set_type **found, bool retry)
+{
+	struct ip_set_type *type;
+	int err;
+
+	if (retry && !load_settype(name))
+		return -IPSET_ERR_FIND_TYPE;
+
+	rcu_read_lock();
+	*found = find_set_type(name, family, revision);
+	if (*found) {
+		err = !try_module_get((*found)->me) ? -EFAULT : 0;
+		goto unlock;
+	}
+	/* Make sure the type is already loaded
+	 * but we don't support the revision
+	 */
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STRNCMP(type->name, name)) {
+			err = -IPSET_ERR_FIND_TYPE;
+			goto unlock;
+		}
+	rcu_read_unlock();
+
+	return retry ? -IPSET_ERR_FIND_TYPE :
+		__find_set_type_get(name, family, revision, found, true);
+
+unlock:
+	rcu_read_unlock();
+	return err;
+}
+
+/* Find a given set type by name and family.
+ * If we succeeded, the supported minimal and maximum revisions are
+ * filled out.
+ */
+#define find_set_type_minmax(name, family, min, max) \
+	__find_set_type_minmax(name, family, min, max, false)
+
+static int
+__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
+		       bool retry)
+{
+	struct ip_set_type *type;
+	bool found = false;
+
+	if (retry && !load_settype(name))
+		return -IPSET_ERR_FIND_TYPE;
+
+	*min = 255; *max = 0;
+	rcu_read_lock();
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STRNCMP(type->name, name) &&
+		    (type->family == family ||
+		     type->family == NFPROTO_UNSPEC)) {
+			found = true;
+			if (type->revision_min < *min)
+				*min = type->revision_min;
+			if (type->revision_max > *max)
+				*max = type->revision_max;
+		}
+	rcu_read_unlock();
+	if (found)
+		return 0;
+
+	return retry ? -IPSET_ERR_FIND_TYPE :
+		__find_set_type_minmax(name, family, min, max, true);
+}
+
+#define family_name(f)	((f) == NFPROTO_IPV4 ? "inet" : \
+			 (f) == NFPROTO_IPV6 ? "inet6" : "any")
+
+/* Register a set type structure. The type is identified by
+ * the unique triple of name, family and revision.
+ */
+int
+ip_set_type_register(struct ip_set_type *type)
+{
+	int ret = 0;
+
+	if (type->protocol != IPSET_PROTOCOL) {
+		pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
+			type->name, family_name(type->family),
+			type->revision_min, type->revision_max,
+			type->protocol, IPSET_PROTOCOL);
+		return -EINVAL;
+	}
+
+	ip_set_type_lock();
+	if (find_set_type(type->name, type->family, type->revision_min)) {
+		/* Duplicate! */
+		pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
+			type->name, family_name(type->family),
+			type->revision_min);
+		ip_set_type_unlock();
+		return -EINVAL;
+	}
+	list_add_rcu(&type->list, &ip_set_type_list);
+	pr_debug("type %s, family %s, revision %u:%u registered.\n",
+		 type->name, family_name(type->family),
+		 type->revision_min, type->revision_max);
+	ip_set_type_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_type_register);
+
+/* Unregister a set type. There's a small race with ip_set_create */
+void
+ip_set_type_unregister(struct ip_set_type *type)
+{
+	ip_set_type_lock();
+	if (!find_set_type(type->name, type->family, type->revision_min)) {
+		pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
+			type->name, family_name(type->family),
+			type->revision_min);
+		ip_set_type_unlock();
+		return;
+	}
+	list_del_rcu(&type->list);
+	pr_debug("type %s, family %s with revision min %u unregistered.\n",
+		 type->name, family_name(type->family), type->revision_min);
+	ip_set_type_unlock();
+
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(ip_set_type_unregister);
+
+/* Utility functions */
+void *
+ip_set_alloc(size_t size)
+{
+	return kvzalloc(size, GFP_KERNEL_ACCOUNT);
+}
+EXPORT_SYMBOL_GPL(ip_set_alloc);
+
+void
+ip_set_free(void *members)
+{
+	pr_debug("%p: free with %s\n", members,
+		 is_vmalloc_addr(members) ? "vfree" : "kfree");
+	kvfree(members);
+}
+EXPORT_SYMBOL_GPL(ip_set_free);
+
+static bool
+flag_nested(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_F_NESTED;
+}
+
+static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
+	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
+	[IPSET_ATTR_IPADDR_IPV6]	= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+};
+
+int
+ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
+{
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
+
+	if (unlikely(!flag_nested(nla)))
+		return -IPSET_ERR_PROTOCOL;
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+			     ipaddr_policy, NULL))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
+		return -IPSET_ERR_PROTOCOL;
+
+	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
+
+int
+ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
+{
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
+
+	if (unlikely(!flag_nested(nla)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+			     ipaddr_policy, NULL))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
+		return -IPSET_ERR_PROTOCOL;
+
+	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
+	       sizeof(struct in6_addr));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+
+static u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+	u32 t;
+
+	if (*timeout == IPSET_ELEM_PERMANENT)
+		return 0;
+
+	t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
+	/* Zero value in userspace means no timeout */
+	return t == 0 ? 1 : t;
+}
+
+static char *
+ip_set_comment_uget(struct nlattr *tb)
+{
+	return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+		    const struct ip_set_ext *ext)
+{
+	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+	size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+	if (unlikely(c)) {
+		set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+		kfree_rcu(c, rcu);
+		rcu_assign_pointer(comment->c, NULL);
+	}
+	if (!len)
+		return;
+	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+		len = IPSET_MAX_COMMENT_SIZE;
+	c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+	if (unlikely(!c))
+		return;
+	strscpy(c->str, ext->comment, len + 1);
+	set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+	rcu_assign_pointer(comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_init_comment);
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+	if (!c)
+		return 0;
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static void
+ip_set_comment_free(struct ip_set *set, void *ptr)
+{
+	struct ip_set_comment *comment = ptr;
+	struct ip_set_comment_rcu *c;
+
+	c = rcu_dereference_protected(comment->c, 1);
+	if (unlikely(!c))
+		return;
+	set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+	kfree_rcu(c, rcu);
+	rcu_assign_pointer(comment->c, NULL);
+}
+
+typedef void (*destroyer)(struct ip_set *, void *);
+/* ipset data extension types, in size order */
+
+const struct ip_set_ext_type ip_set_extensions[] = {
+	[IPSET_EXT_ID_COUNTER] = {
+		.type	= IPSET_EXT_COUNTER,
+		.flag	= IPSET_FLAG_WITH_COUNTERS,
+		.len	= sizeof(struct ip_set_counter),
+		.align	= __alignof__(struct ip_set_counter),
+	},
+	[IPSET_EXT_ID_TIMEOUT] = {
+		.type	= IPSET_EXT_TIMEOUT,
+		.len	= sizeof(unsigned long),
+		.align	= __alignof__(unsigned long),
+	},
+	[IPSET_EXT_ID_SKBINFO] = {
+		.type	= IPSET_EXT_SKBINFO,
+		.flag	= IPSET_FLAG_WITH_SKBINFO,
+		.len	= sizeof(struct ip_set_skbinfo),
+		.align	= __alignof__(struct ip_set_skbinfo),
+	},
+	[IPSET_EXT_ID_COMMENT] = {
+		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
+		.flag	 = IPSET_FLAG_WITH_COMMENT,
+		.len	 = sizeof(struct ip_set_comment),
+		.align	 = __alignof__(struct ip_set_comment),
+		.destroy = ip_set_comment_free,
+	},
+};
+EXPORT_SYMBOL_GPL(ip_set_extensions);
+
+static bool
+add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
+{
+	return ip_set_extensions[id].flag ?
+		(flags & ip_set_extensions[id].flag) :
+		!!tb[IPSET_ATTR_TIMEOUT];
+}
+
+size_t
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+		size_t align)
+{
+	enum ip_set_ext_id id;
+	u32 cadt_flags = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
+		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+	if (!align)
+		align = 1;
+	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
+		if (!add_extension(id, cadt_flags, tb))
+			continue;
+		if (align < ip_set_extensions[id].align)
+			align = ip_set_extensions[id].align;
+		len = ALIGN(len, ip_set_extensions[id].align);
+		set->offset[id] = len;
+		set->extensions |= ip_set_extensions[id].type;
+		len += ip_set_extensions[id].len;
+	}
+	return ALIGN(len, align);
+}
+EXPORT_SYMBOL_GPL(ip_set_elem_len);
+
+int
+ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
+		      struct ip_set_ext *ext)
+{
+	u64 fullmark;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!SET_WITH_TIMEOUT(set))
+			return -IPSET_ERR_TIMEOUT;
+		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
+		if (!SET_WITH_COUNTER(set))
+			return -IPSET_ERR_COUNTER;
+		if (tb[IPSET_ATTR_BYTES])
+			ext->bytes = be64_to_cpu(nla_get_be64(
+						 tb[IPSET_ATTR_BYTES]));
+		if (tb[IPSET_ATTR_PACKETS])
+			ext->packets = be64_to_cpu(nla_get_be64(
+						   tb[IPSET_ATTR_PACKETS]));
+	}
+	if (tb[IPSET_ATTR_COMMENT]) {
+		if (!SET_WITH_COMMENT(set))
+			return -IPSET_ERR_COMMENT;
+		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
+	}
+	if (tb[IPSET_ATTR_SKBMARK]) {
+		if (!SET_WITH_SKBINFO(set))
+			return -IPSET_ERR_SKBINFO;
+		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+		ext->skbinfo.skbmark = fullmark >> 32;
+		ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
+	}
+	if (tb[IPSET_ATTR_SKBPRIO]) {
+		if (!SET_WITH_SKBINFO(set))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbinfo.skbprio =
+			be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
+	}
+	if (tb[IPSET_ATTR_SKBQUEUE]) {
+		if (!SET_WITH_SKBINFO(set))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbinfo.skbqueue =
+			be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+
+static u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->packets);
+}
+
+static bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+	return nla_put_net64(skb, IPSET_ATTR_BYTES,
+			     cpu_to_be64(ip_set_get_bytes(counter)),
+			     IPSET_ATTR_PAD) ||
+	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
+			     cpu_to_be64(ip_set_get_packets(counter)),
+			     IPSET_ATTR_PAD);
+}
+
+static bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask),
+			      IPSET_ATTR_PAD)) ||
+	       (skbinfo->skbprio &&
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			      cpu_to_be16(skbinfo->skbqueue)));
+}
+
+int
+ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+		      const void *e, bool active)
+{
+	if (SET_WITH_TIMEOUT(set)) {
+		unsigned long *timeout = ext_timeout(e, set);
+
+		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+			htonl(active ? ip_set_timeout_get(timeout)
+				: *timeout)))
+			return -EMSGSIZE;
+	}
+	if (SET_WITH_COUNTER(set) &&
+	    ip_set_put_counter(skb, ext_counter(e, set)))
+		return -EMSGSIZE;
+	if (SET_WITH_COMMENT(set) &&
+	    ip_set_put_comment(skb, ext_comment(e, set)))
+		return -EMSGSIZE;
+	if (SET_WITH_SKBINFO(set) &&
+	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+		return -EMSGSIZE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+
+static bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+	switch (op) {
+	case IPSET_COUNTER_NONE:
+		return true;
+	case IPSET_COUNTER_EQ:
+		return counter == match;
+	case IPSET_COUNTER_NE:
+		return counter != match;
+	case IPSET_COUNTER_LT:
+		return counter < match;
+	case IPSET_COUNTER_GT:
+		return counter > match;
+	}
+	return false;
+}
+
+static void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static void
+ip_set_update_counter(struct ip_set_counter *counter,
+		      const struct ip_set_ext *ext, u32 flags)
+{
+	if (ext->packets != ULLONG_MAX &&
+	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+		ip_set_add_bytes(ext->bytes, counter);
+		ip_set_add_packets(ext->packets, counter);
+	}
+}
+
+static void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		   const struct ip_set_ext *ext,
+		   struct ip_set_ext *mext, u32 flags)
+{
+	mext->skbinfo = *skbinfo;
+}
+
+bool
+ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+			struct ip_set_ext *mext, u32 flags, void *data)
+{
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(data, set)))
+		return false;
+	if (SET_WITH_COUNTER(set)) {
+		struct ip_set_counter *counter = ext_counter(data, set);
+
+		ip_set_update_counter(counter, ext, flags);
+
+		if (flags & IPSET_FLAG_MATCH_COUNTERS &&
+		    !(ip_set_match_counter(ip_set_get_packets(counter),
+				mext->packets, mext->packets_op) &&
+		      ip_set_match_counter(ip_set_get_bytes(counter),
+				mext->bytes, mext->bytes_op)))
+			return false;
+	}
+	if (SET_WITH_SKBINFO(set))
+		ip_set_get_skbinfo(ext_skbinfo(data, set),
+				   ext, mext, flags);
+	return true;
+}
+EXPORT_SYMBOL_GPL(ip_set_match_extensions);
+
+/* Creating/destroying/renaming/swapping affect the existence and
+ * the properties of a set. All of these can be executed from userspace
+ * only and serialized by the nfnl mutex indirectly from nfnetlink.
+ *
+ * Sets are identified by their index in ip_set_list and the index
+ * is used by the external references (set/SET netfilter modules).
+ *
+ * The set behind an index may change by swapping only, from userspace.
+ */
+
+static void
+__ip_set_get(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	set->ref++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
+__ip_set_put(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(set->ref == 0);
+	set->ref--;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
+ * a separate reference counter
+ */
+static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	set->ref_netlink++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
+__ip_set_put_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(set->ref_netlink == 0);
+	set->ref_netlink--;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+/* Add, del and test set entries from kernel.
+ *
+ * The set behind the index must exist and must be referenced
+ * so it can't be destroyed (or changed) under our foot.
+ */
+
+static struct ip_set *
+ip_set_rcu_get(struct net *net, ip_set_id_t index)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	/* ip_set_list and the set pointer need to be protected */
+	return ip_set_dereference_nfnl(inst->ip_set_list)[index];
+}
+
+static inline void
+ip_set_lock(struct ip_set *set)
+{
+	if (!set->variant->region_lock)
+		spin_lock_bh(&set->lock);
+}
+
+static inline void
+ip_set_unlock(struct ip_set *set)
+{
+	if (!set->variant->region_lock)
+		spin_unlock_bh(&set->lock);
+}
+
+int
+ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
+	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
+{
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
+	int ret = 0;
+
+	BUG_ON(!set);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (opt->dim < set->type->dimension ||
+	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
+		return 0;
+
+	rcu_read_lock_bh();
+	ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
+	rcu_read_unlock_bh();
+
+	if (ret == -EAGAIN) {
+		/* Type requests element to be completed */
+		pr_debug("element must be completed, ADD is triggered\n");
+		ip_set_lock(set);
+		set->variant->kadt(set, skb, par, IPSET_ADD, opt);
+		ip_set_unlock(set);
+		ret = 1;
+	} else {
+		/* --return-nomatch: invert matched element */
+		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
+		    (set->type->features & IPSET_TYPE_NOMATCH) &&
+		    (ret > 0 || ret == -ENOTEMPTY))
+			ret = -ret;
+	}
+
+	/* Convert error codes to nomatch */
+	return (ret < 0 ? 0 : ret);
+}
+EXPORT_SYMBOL_GPL(ip_set_test);
+
+int
+ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
+	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
+{
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
+	int ret;
+
+	BUG_ON(!set);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (opt->dim < set->type->dimension ||
+	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
+		return -IPSET_ERR_TYPE_MISMATCH;
+
+	ip_set_lock(set);
+	ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
+	ip_set_unlock(set);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_add);
+
+int
+ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
+	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
+{
+	struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
+	int ret = 0;
+
+	BUG_ON(!set);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (opt->dim < set->type->dimension ||
+	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
+		return -IPSET_ERR_TYPE_MISMATCH;
+
+	ip_set_lock(set);
+	ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
+	ip_set_unlock(set);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_del);
+
+/* Find set by name, reference it once. The reference makes sure the
+ * thing pointed to, does not go away under our feet.
+ *
+ */
+ip_set_id_t
+ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
+{
+	ip_set_id_t i, index = IPSET_INVALID_ID;
+	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	rcu_read_lock();
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = rcu_dereference(inst->ip_set_list)[i];
+		if (s && STRNCMP(s->name, name)) {
+			__ip_set_get(s);
+			index = i;
+			*set = s;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return index;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_byname);
+
+/* If the given set pointer points to a valid set, decrement
+ * reference count by 1. The caller shall not assume the index
+ * to be valid, after calling this function.
+ *
+ */
+
+static void
+__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
+{
+	struct ip_set *set;
+
+	rcu_read_lock();
+	set = rcu_dereference(inst->ip_set_list)[index];
+	if (set)
+		__ip_set_put(set);
+	rcu_read_unlock();
+}
+
+void
+ip_set_put_byindex(struct net *net, ip_set_id_t index)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	__ip_set_put_byindex(inst, index);
+}
+EXPORT_SYMBOL_GPL(ip_set_put_byindex);
+
+/* Get the name of a set behind a set index.
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
+ */
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
+{
+	struct ip_set *set = ip_set_rcu_get(net, index);
+
+	BUG_ON(!set);
+
+	read_lock_bh(&ip_set_ref_lock);
+	strncpy(name, set->name, IPSET_MAXNAMELEN);
+	read_unlock_bh(&ip_set_ref_lock);
+}
+EXPORT_SYMBOL_GPL(ip_set_name_byindex);
+
+/* Routines to call by external subsystems, which do not
+ * call nfnl_lock for us.
+ */
+
+/* Find set by index, reference it once. The reference makes sure the
+ * thing pointed to, does not go away under our feet.
+ *
+ * The nfnl mutex is used in the function.
+ */
+ip_set_id_t
+ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
+{
+	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	if (index >= inst->ip_set_max)
+		return IPSET_INVALID_ID;
+
+	nfnl_lock(NFNL_SUBSYS_IPSET);
+	set = ip_set(inst, index);
+	if (set)
+		__ip_set_get(set);
+	else
+		index = IPSET_INVALID_ID;
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
+
+	return index;
+}
+EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
+
+/* If the given set pointer points to a valid set, decrement
+ * reference count by 1. The caller shall not assume the index
+ * to be valid, after calling this function.
+ *
+ * The nfnl mutex is used in the function.
+ */
+void
+ip_set_nfnl_put(struct net *net, ip_set_id_t index)
+{
+	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	nfnl_lock(NFNL_SUBSYS_IPSET);
+	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
+		set = ip_set(inst, index);
+		if (set)
+			__ip_set_put(set);
+	}
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
+}
+EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
+
+/* Communication protocol with userspace over netlink.
+ *
+ * The commands are serialized by the nfnl mutex.
+ */
+
+static inline u8 protocol(const struct nlattr * const tb[])
+{
+	return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
+}
+
+static inline bool
+protocol_failed(const struct nlattr * const tb[])
+{
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
+}
+
+static inline bool
+protocol_min_failed(const struct nlattr * const tb[])
+{
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
+}
+
+static inline u32
+flag_exist(const struct nlmsghdr *nlh)
+{
+	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
+}
+
+static struct nlmsghdr *
+start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
+	  enum ipset_cmd cmd)
+{
+	return nfnl_msg_put(skb, portid, seq,
+			    nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
+			    NFPROTO_IPV4, NFNETLINK_V0, 0);
+}
+
+/* Create a set */
+
+static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1},
+	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
+	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
+	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
+};
+
+static struct ip_set *
+find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
+{
+	struct ip_set *set = NULL;
+	ip_set_id_t i;
+
+	*id = IPSET_INVALID_ID;
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = ip_set(inst, i);
+		if (set && STRNCMP(set->name, name)) {
+			*id = i;
+			break;
+		}
+	}
+	return (*id == IPSET_INVALID_ID ? NULL : set);
+}
+
+static inline struct ip_set *
+find_set(struct ip_set_net *inst, const char *name)
+{
+	ip_set_id_t id;
+
+	return find_set_and_id(inst, name, &id);
+}
+
+static int
+find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
+	     struct ip_set **set)
+{
+	struct ip_set *s;
+	ip_set_id_t i;
+
+	*index = IPSET_INVALID_ID;
+	for (i = 0;  i < inst->ip_set_max; i++) {
+		s = ip_set(inst, i);
+		if (!s) {
+			if (*index == IPSET_INVALID_ID)
+				*index = i;
+		} else if (STRNCMP(name, s->name)) {
+			/* Name clash */
+			*set = s;
+			return -EEXIST;
+		}
+	}
+	if (*index == IPSET_INVALID_ID)
+		/* No free slot remained */
+		return -IPSET_ERR_MAX_SETS;
+	return 0;
+}
+
+static int ip_set_none(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	return -EOPNOTSUPP;
+}
+
+static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *set, *clash = NULL;
+	ip_set_id_t index = IPSET_INVALID_ID;
+	struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
+	const char *name, *typename;
+	u8 family, revision;
+	u32 flags = flag_exist(info->nlh);
+	int ret = 0;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_REVISION] ||
+		     !attr[IPSET_ATTR_FAMILY] ||
+		     (attr[IPSET_ATTR_DATA] &&
+		      !flag_nested(attr[IPSET_ATTR_DATA]))))
+		return -IPSET_ERR_PROTOCOL;
+
+	name = nla_data(attr[IPSET_ATTR_SETNAME]);
+	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
+	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
+		 name, typename, family_name(family), revision);
+
+	/* First, and without any locks, allocate and initialize
+	 * a normal base set structure.
+	 */
+	set = kzalloc(sizeof(*set), GFP_KERNEL);
+	if (!set)
+		return -ENOMEM;
+	spin_lock_init(&set->lock);
+	strscpy(set->name, name, IPSET_MAXNAMELEN);
+	set->family = family;
+	set->revision = revision;
+
+	/* Next, check that we know the type, and take
+	 * a reference on the type, to make sure it stays available
+	 * while constructing our new set.
+	 *
+	 * After referencing the type, we try to create the type
+	 * specific part of the set without holding any locks.
+	 */
+	ret = find_set_type_get(typename, family, revision, &set->type);
+	if (ret)
+		goto out;
+
+	/* Without holding any locks, create private part. */
+	if (attr[IPSET_ATTR_DATA] &&
+	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->create_policy, NULL)) {
+		ret = -IPSET_ERR_PROTOCOL;
+		goto put_out;
+	}
+	/* Set create flags depending on the type revision */
+	set->flags |= set->type->create_flags[revision];
+
+	ret = set->type->create(info->net, set, tb, flags);
+	if (ret != 0)
+		goto put_out;
+
+	/* BTW, ret==0 here. */
+
+	/* Here, we have a valid, constructed set and we are protected
+	 * by the nfnl mutex. Find the first free index in ip_set_list
+	 * and check clashing.
+	 */
+	ret = find_free_id(inst, set->name, &index, &clash);
+	if (ret == -EEXIST) {
+		/* If this is the same set and requested, ignore error */
+		if ((flags & IPSET_FLAG_EXIST) &&
+		    STRNCMP(set->type->name, clash->type->name) &&
+		    set->type->family == clash->type->family &&
+		    set->type->revision_min == clash->type->revision_min &&
+		    set->type->revision_max == clash->type->revision_max &&
+		    set->variant->same_set(set, clash))
+			ret = 0;
+		goto cleanup;
+	} else if (ret == -IPSET_ERR_MAX_SETS) {
+		struct ip_set **list, **tmp;
+		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
+
+		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
+			/* Wraparound */
+			goto cleanup;
+
+		list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
+		if (!list)
+			goto cleanup;
+		/* nfnl mutex is held, both lists are valid */
+		tmp = ip_set_dereference(inst->ip_set_list);
+		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
+		rcu_assign_pointer(inst->ip_set_list, list);
+		/* Make sure all current packets have passed through */
+		synchronize_net();
+		/* Use new list */
+		index = inst->ip_set_max;
+		inst->ip_set_max = i;
+		kvfree(tmp);
+		ret = 0;
+	} else if (ret) {
+		goto cleanup;
+	}
+
+	/* Finally! Add our shiny new set to the list, and be done. */
+	pr_debug("create: '%s' created with index %u!\n", set->name, index);
+	ip_set(inst, index) = set;
+
+	return ret;
+
+cleanup:
+	set->variant->destroy(set);
+put_out:
+	module_put(set->type->me);
+out:
+	kfree(set);
+	return ret;
+}
+
+/* Destroy sets */
+
+static const struct nla_policy
+ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+};
+
+static void
+ip_set_destroy_set(struct ip_set *set)
+{
+	pr_debug("set: %s\n",  set->name);
+
+	/* Must call it without holding any lock */
+	set->variant->destroy(set);
+	module_put(set->type->me);
+	kfree(set);
+}
+
+static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
+			  const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *s;
+	ip_set_id_t i;
+	int ret = 0;
+
+	if (unlikely(protocol_min_failed(attr)))
+		return -IPSET_ERR_PROTOCOL;
+
+	/* Must wait for flush to be really finished in list:set */
+	rcu_barrier();
+
+	/* Commands are serialized and references are
+	 * protected by the ip_set_ref_lock.
+	 * External systems (i.e. xt_set) must call
+	 * ip_set_put|get_nfnl_* functions, that way we
+	 * can safely check references here.
+	 *
+	 * list:set timer can only decrement the reference
+	 * counter, so if it's already zero, we can proceed
+	 * without holding the lock.
+	 */
+	read_lock_bh(&ip_set_ref_lock);
+	if (!attr[IPSET_ATTR_SETNAME]) {
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = ip_set(inst, i);
+			if (s && (s->ref || s->ref_netlink)) {
+				ret = -IPSET_ERR_BUSY;
+				goto out;
+			}
+		}
+		inst->is_destroyed = true;
+		read_unlock_bh(&ip_set_ref_lock);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = ip_set(inst, i);
+			if (s) {
+				ip_set(inst, i) = NULL;
+				ip_set_destroy_set(s);
+			}
+		}
+		/* Modified by ip_set_destroy() only, which is serialized */
+		inst->is_destroyed = false;
+	} else {
+		u32 flags = flag_exist(info->nlh);
+		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+				    &i);
+		if (!s) {
+			if (!(flags & IPSET_FLAG_EXIST))
+				ret = -ENOENT;
+			goto out;
+		} else if (s->ref || s->ref_netlink) {
+			ret = -IPSET_ERR_BUSY;
+			goto out;
+		}
+		ip_set(inst, i) = NULL;
+		read_unlock_bh(&ip_set_ref_lock);
+
+		ip_set_destroy_set(s);
+	}
+	return 0;
+out:
+	read_unlock_bh(&ip_set_ref_lock);
+	return ret;
+}
+
+/* Flush sets */
+
+static void
+ip_set_flush_set(struct ip_set *set)
+{
+	pr_debug("set: %s\n",  set->name);
+
+	ip_set_lock(set);
+	set->variant->flush(set);
+	ip_set_unlock(set);
+}
+
+static int ip_set_flush(struct sk_buff *skb, const struct nfnl_info *info,
+			const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *s;
+	ip_set_id_t i;
+
+	if (unlikely(protocol_min_failed(attr)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (!attr[IPSET_ATTR_SETNAME]) {
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = ip_set(inst, i);
+			if (s)
+				ip_set_flush_set(s);
+		}
+	} else {
+		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+		if (!s)
+			return -ENOENT;
+
+		ip_set_flush_set(s);
+	}
+
+	return 0;
+}
+
+/* Rename a set */
+
+static const struct nla_policy
+ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+};
+
+static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *set, *s;
+	const char *name2;
+	ip_set_id_t i;
+	int ret = 0;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (!set)
+		return -ENOENT;
+
+	write_lock_bh(&ip_set_ref_lock);
+	if (set->ref != 0 || set->ref_netlink != 0) {
+		ret = -IPSET_ERR_REFERENCED;
+		goto out;
+	}
+
+	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = ip_set(inst, i);
+		if (s && STRNCMP(s->name, name2)) {
+			ret = -IPSET_ERR_EXIST_SETNAME2;
+			goto out;
+		}
+	}
+	strncpy(set->name, name2, IPSET_MAXNAMELEN);
+
+out:
+	write_unlock_bh(&ip_set_ref_lock);
+	return ret;
+}
+
+/* Swap two sets so that name/index points to the other.
+ * References and set names are also swapped.
+ *
+ * The commands are serialized by the nfnl mutex and references are
+ * protected by the ip_set_ref_lock. The kernel interfaces
+ * do not hold the mutex but the pointer settings are atomic
+ * so the ip_set_list always contains valid pointers to the sets.
+ */
+
+static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *from, *to;
+	ip_set_id_t from_id, to_id;
+	char from_name[IPSET_MAXNAMELEN];
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
+		return -IPSET_ERR_PROTOCOL;
+
+	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+			       &from_id);
+	if (!from)
+		return -ENOENT;
+
+	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
+			     &to_id);
+	if (!to)
+		return -IPSET_ERR_EXIST_SETNAME2;
+
+	/* Features must not change.
+	 * Not an artifical restriction anymore, as we must prevent
+	 * possible loops created by swapping in setlist type of sets.
+	 */
+	if (!(from->type->features == to->type->features &&
+	      from->family == to->family))
+		return -IPSET_ERR_TYPE_MISMATCH;
+
+	write_lock_bh(&ip_set_ref_lock);
+
+	if (from->ref_netlink || to->ref_netlink) {
+		write_unlock_bh(&ip_set_ref_lock);
+		return -EBUSY;
+	}
+
+	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
+	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
+	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+
+	swap(from->ref, to->ref);
+	ip_set(inst, from_id) = to;
+	ip_set(inst, to_id) = from;
+	write_unlock_bh(&ip_set_ref_lock);
+
+	/* Make sure all readers of the old set pointers are completed. */
+	synchronize_rcu();
+
+	return 0;
+}
+
+/* List/save set data */
+
+#define DUMP_INIT	0
+#define DUMP_ALL	1
+#define DUMP_ONE	2
+#define DUMP_LAST	3
+
+#define DUMP_TYPE(arg)		(((u32)(arg)) & 0x0000FFFF)
+#define DUMP_FLAGS(arg)		(((u32)(arg)) >> 16)
+
+int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+	u32 cadt_flags = 0;
+
+	if (SET_WITH_TIMEOUT(set))
+		if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					   htonl(set->timeout))))
+			return -EMSGSIZE;
+	if (SET_WITH_COUNTER(set))
+		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+	if (SET_WITH_COMMENT(set))
+		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_SKBINFO(set))
+		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
+	if (SET_WITH_FORCEADD(set))
+		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
+
+	if (!cadt_flags)
+		return 0;
+	return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+EXPORT_SYMBOL_GPL(ip_set_put_flags);
+
+static int
+ip_set_dump_done(struct netlink_callback *cb)
+{
+	if (cb->args[IPSET_CB_ARG0]) {
+		struct ip_set_net *inst =
+			(struct ip_set_net *)cb->args[IPSET_CB_NET];
+		ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+		struct ip_set *set = ip_set_ref_netlink(inst, index);
+
+		if (set->variant->uref)
+			set->variant->uref(set, cb, false);
+		pr_debug("release set %s\n", set->name);
+		__ip_set_put_netlink(set);
+	}
+	return 0;
+}
+
+static inline void
+dump_attrs(struct nlmsghdr *nlh)
+{
+	const struct nlattr *attr;
+	int rem;
+
+	pr_debug("dump nlmsg\n");
+	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
+		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
+	}
+}
+
+static const struct nla_policy
+ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int
+ip_set_dump_start(struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
+	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+	struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
+	struct nlattr *attr = (void *)nlh + min_len;
+	struct sk_buff *skb = cb->skb;
+	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
+	u32 dump_type;
+	int ret;
+
+	ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
+			nlh->nlmsg_len - min_len,
+			ip_set_dump_policy, NULL);
+	if (ret)
+		goto error;
+
+	cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
+	if (cda[IPSET_ATTR_SETNAME]) {
+		ip_set_id_t index;
+		struct ip_set *set;
+
+		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
+				      &index);
+		if (!set) {
+			ret = -ENOENT;
+			goto error;
+		}
+		dump_type = DUMP_ONE;
+		cb->args[IPSET_CB_INDEX] = index;
+	} else {
+		dump_type = DUMP_ALL;
+	}
+
+	if (cda[IPSET_ATTR_FLAGS]) {
+		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
+
+		dump_type |= (f << 16);
+	}
+	cb->args[IPSET_CB_NET] = (unsigned long)inst;
+	cb->args[IPSET_CB_DUMP] = dump_type;
+
+	return 0;
+
+error:
+	/* We have to create and send the error message manually :-( */
+	if (nlh->nlmsg_flags & NLM_F_ACK) {
+		netlink_ack(cb->skb, nlh, ret, NULL);
+	}
+	return ret;
+}
+
+static int
+ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	ip_set_id_t index = IPSET_INVALID_ID, max;
+	struct ip_set *set = NULL;
+	struct nlmsghdr *nlh = NULL;
+	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
+	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
+	u32 dump_type, dump_flags;
+	bool is_destroyed;
+	int ret = 0;
+
+	if (!cb->args[IPSET_CB_DUMP])
+		return -EINVAL;
+
+	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
+		goto out;
+
+	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
+	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
+	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
+				    : inst->ip_set_max;
+dump_last:
+	pr_debug("dump type, flag: %u %u index: %ld\n",
+		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
+	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
+		index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+		write_lock_bh(&ip_set_ref_lock);
+		set = ip_set(inst, index);
+		is_destroyed = inst->is_destroyed;
+		if (!set || is_destroyed) {
+			write_unlock_bh(&ip_set_ref_lock);
+			if (dump_type == DUMP_ONE) {
+				ret = -ENOENT;
+				goto out;
+			}
+			if (is_destroyed) {
+				/* All sets are just being destroyed */
+				ret = 0;
+				goto out;
+			}
+			continue;
+		}
+		/* When dumping all sets, we must dump "sorted"
+		 * so that lists (unions of sets) are dumped last.
+		 */
+		if (dump_type != DUMP_ONE &&
+		    ((dump_type == DUMP_ALL) ==
+		     !!(set->type->features & IPSET_DUMP_LAST))) {
+			write_unlock_bh(&ip_set_ref_lock);
+			continue;
+		}
+		pr_debug("List set: %s\n", set->name);
+		if (!cb->args[IPSET_CB_ARG0]) {
+			/* Start listing: make sure set won't be destroyed */
+			pr_debug("reference set\n");
+			set->ref_netlink++;
+		}
+		write_unlock_bh(&ip_set_ref_lock);
+		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
+				cb->nlh->nlmsg_seq, flags,
+				IPSET_CMD_LIST);
+		if (!nlh) {
+			ret = -EMSGSIZE;
+			goto release_refcount;
+		}
+		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
+			       cb->args[IPSET_CB_PROTO]) ||
+		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
+			goto nla_put_failure;
+		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
+			goto next_set;
+		switch (cb->args[IPSET_CB_ARG0]) {
+		case 0:
+			/* Core header data */
+			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
+					   set->type->name) ||
+			    nla_put_u8(skb, IPSET_ATTR_FAMILY,
+				       set->family) ||
+			    nla_put_u8(skb, IPSET_ATTR_REVISION,
+				       set->revision))
+				goto nla_put_failure;
+			if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
+			    nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
+				goto nla_put_failure;
+			ret = set->variant->head(set, skb);
+			if (ret < 0)
+				goto release_refcount;
+			if (dump_flags & IPSET_FLAG_LIST_HEADER)
+				goto next_set;
+			if (set->variant->uref)
+				set->variant->uref(set, cb, true);
+			fallthrough;
+		default:
+			ret = set->variant->list(set, skb, cb);
+			if (!cb->args[IPSET_CB_ARG0])
+				/* Set is done, proceed with next one */
+				goto next_set;
+			goto release_refcount;
+		}
+	}
+	/* If we dump all sets, continue with dumping last ones */
+	if (dump_type == DUMP_ALL) {
+		dump_type = DUMP_LAST;
+		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
+		cb->args[IPSET_CB_INDEX] = 0;
+		if (set && set->variant->uref)
+			set->variant->uref(set, cb, false);
+		goto dump_last;
+	}
+	goto out;
+
+nla_put_failure:
+	ret = -EFAULT;
+next_set:
+	if (dump_type == DUMP_ONE)
+		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
+	else
+		cb->args[IPSET_CB_INDEX]++;
+release_refcount:
+	/* If there was an error or set is done, release set */
+	if (ret || !cb->args[IPSET_CB_ARG0]) {
+		set = ip_set_ref_netlink(inst, index);
+		if (set->variant->uref)
+			set->variant->uref(set, cb, false);
+		pr_debug("release set %s\n", set->name);
+		__ip_set_put_netlink(set);
+		cb->args[IPSET_CB_ARG0] = 0;
+	}
+out:
+	if (nlh) {
+		nlmsg_end(skb, nlh);
+		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
+		dump_attrs(nlh);
+	}
+
+	return ret < 0 ? ret : skb->len;
+}
+
+static int ip_set_dump(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	if (unlikely(protocol_min_failed(attr)))
+		return -IPSET_ERR_PROTOCOL;
+
+	{
+		struct netlink_dump_control c = {
+			.start = ip_set_dump_start,
+			.dump = ip_set_dump_do,
+			.done = ip_set_dump_done,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+}
+
+/* Add, del and test */
+
+static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
+	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
+};
+
+static int
+call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+	struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt,
+	u32 flags, bool use_lineno)
+{
+	int ret;
+	u32 lineno = 0;
+	bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
+
+	do {
+		if (retried) {
+			__ip_set_get_netlink(set);
+			nfnl_unlock(NFNL_SUBSYS_IPSET);
+			cond_resched();
+			nfnl_lock(NFNL_SUBSYS_IPSET);
+			__ip_set_put_netlink(set);
+		}
+
+		ip_set_lock(set);
+		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
+		ip_set_unlock(set);
+		retried = true;
+	} while (ret == -ERANGE ||
+		 (ret == -EAGAIN &&
+		  set->variant->resize &&
+		  (ret = set->variant->resize(set, retried)) == 0));
+
+	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+		return 0;
+	if (lineno && use_lineno) {
+		/* Error in restore/batch mode: send back lineno */
+		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
+		struct sk_buff *skb2;
+		struct nlmsgerr *errmsg;
+		size_t payload = min(SIZE_MAX,
+				     sizeof(*errmsg) + nlmsg_len(nlh));
+		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+		struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
+		struct nlattr *cmdattr;
+		u32 *errline;
+
+		skb2 = nlmsg_new(payload, GFP_KERNEL);
+		if (!skb2)
+			return -ENOMEM;
+		rep = nlmsg_put(skb2, NETLINK_CB(skb).portid,
+				nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+		errmsg = nlmsg_data(rep);
+		errmsg->error = ret;
+		unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len,
+			      /* Bounds checked by the skb layer. */);
+
+		cmdattr = (void *)&errmsg->msg + min_len;
+
+		ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+				nlh->nlmsg_len - min_len, ip_set_adt_policy,
+				NULL);
+
+		if (ret) {
+			nlmsg_free(skb2);
+			return ret;
+		}
+		errline = nla_data(cda[IPSET_ATTR_LINENO]);
+
+		*errline = lineno;
+
+		nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+		/* Signal netlink not to send its ACK/errmsg.  */
+		return -EINTR;
+	}
+
+	return ret;
+}
+
+static int ip_set_ad(struct net *net, struct sock *ctnl,
+		     struct sk_buff *skb,
+		     enum ipset_adt adt,
+		     const struct nlmsghdr *nlh,
+		     const struct nlattr * const attr[],
+		     struct netlink_ext_ack *extack)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set *set;
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
+	const struct nlattr *nla;
+	u32 flags = flag_exist(nlh);
+	bool use_lineno;
+	int ret = 0;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !((attr[IPSET_ATTR_DATA] != NULL) ^
+		       (attr[IPSET_ATTR_ADT] != NULL)) ||
+		     (attr[IPSET_ATTR_DATA] &&
+		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
+		     (attr[IPSET_ATTR_ADT] &&
+		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
+		       !attr[IPSET_ATTR_LINENO]))))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (!set)
+		return -ENOENT;
+
+	use_lineno = !!attr[IPSET_ATTR_LINENO];
+	if (attr[IPSET_ATTR_DATA]) {
+		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+				     attr[IPSET_ATTR_DATA],
+				     set->type->adt_policy, NULL))
+			return -IPSET_ERR_PROTOCOL;
+		ret = call_ad(net, ctnl, skb, set, tb, adt, flags,
+			      use_lineno);
+	} else {
+		int nla_rem;
+
+		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
+			if (nla_type(nla) != IPSET_ATTR_DATA ||
+			    !flag_nested(nla) ||
+			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+					     set->type->adt_policy, NULL))
+				return -IPSET_ERR_PROTOCOL;
+			ret = call_ad(net, ctnl, skb, set, tb, adt,
+				      flags, use_lineno);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return ret;
+}
+
+static int ip_set_uadd(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	return ip_set_ad(info->net, info->sk, skb,
+			 IPSET_ADD, info->nlh, attr, info->extack);
+}
+
+static int ip_set_udel(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	return ip_set_ad(info->net, info->sk, skb,
+			 IPSET_DEL, info->nlh, attr, info->extack);
+}
+
+static int ip_set_utest(struct sk_buff *skb, const struct nfnl_info *info,
+			const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct ip_set *set;
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
+	int ret = 0;
+	u32 lineno;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_DATA] ||
+		     !flag_nested(attr[IPSET_ATTR_DATA])))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (!set)
+		return -ENOENT;
+
+	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->adt_policy, NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	rcu_read_lock_bh();
+	ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
+	rcu_read_unlock_bh();
+	/* Userspace can't trigger element to be re-added */
+	if (ret == -EAGAIN)
+		ret = 1;
+
+	return ret > 0 ? 0 : -IPSET_ERR_EXIST;
+}
+
+/* Get headed data of a set */
+
+static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	const struct ip_set *set;
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME]))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (!set)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
+			 IPSET_CMD_HEADER);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
+	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
+	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
+	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get type data */
+
+static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
+};
+
+static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
+{
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	u8 family, min, max;
+	const char *typename;
+	int ret = 0;
+
+	if (unlikely(protocol_min_failed(attr) ||
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_FAMILY]))
+		return -IPSET_ERR_PROTOCOL;
+
+	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+	ret = find_set_type_minmax(typename, family, &min, &max);
+	if (ret)
+		return ret;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
+			 IPSET_CMD_TYPE);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
+	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
+	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
+	    nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get protocol version */
+
+static const struct nla_policy
+ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+};
+
+static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
+			   const struct nlattr * const attr[])
+{
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+
+	if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
+		return -IPSET_ERR_PROTOCOL;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
+			 IPSET_CMD_PROTOCOL);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
+		goto nla_put_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get set by name or index, from userspace */
+
+static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME]))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
+	if (id == IPSET_INVALID_ID)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYNAME);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
+	    nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_INDEX]	= { .type = NLA_U16 },
+};
+
+static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
+			  const struct nlattr * const attr[])
+{
+	struct ip_set_net *inst = ip_set_pernet(info->net);
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_INDEX]))
+		return -IPSET_ERR_PROTOCOL;
+
+	id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
+	if (id >= inst->ip_set_max)
+		return -ENOENT;
+	set = ip_set(inst, id);
+	if (set == NULL)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYINDEX);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
+	[IPSET_CMD_NONE]	= {
+		.call		= ip_set_none,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+	},
+	[IPSET_CMD_CREATE]	= {
+		.call		= ip_set_create,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_create_policy,
+	},
+	[IPSET_CMD_DESTROY]	= {
+		.call		= ip_set_destroy,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_FLUSH]	= {
+		.call		= ip_set_flush,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_RENAME]	= {
+		.call		= ip_set_rename,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname2_policy,
+	},
+	[IPSET_CMD_SWAP]	= {
+		.call		= ip_set_swap,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname2_policy,
+	},
+	[IPSET_CMD_LIST]	= {
+		.call		= ip_set_dump,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_dump_policy,
+	},
+	[IPSET_CMD_SAVE]	= {
+		.call		= ip_set_dump,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_ADD]	= {
+		.call		= ip_set_uadd,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_DEL]	= {
+		.call		= ip_set_udel,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_TEST]	= {
+		.call		= ip_set_utest,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_HEADER]	= {
+		.call		= ip_set_header,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_TYPE]	= {
+		.call		= ip_set_type,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_type_policy,
+	},
+	[IPSET_CMD_PROTOCOL]	= {
+		.call		= ip_set_protocol,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_protocol_policy,
+	},
+	[IPSET_CMD_GET_BYNAME]	= {
+		.call		= ip_set_byname,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_GET_BYINDEX]	= {
+		.call		= ip_set_byindex,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_index_policy,
+	},
+};
+
+static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
+	.name		= "ip_set",
+	.subsys_id	= NFNL_SUBSYS_IPSET,
+	.cb_count	= IPSET_MSG_MAX,
+	.cb		= ip_set_netlink_subsys_cb,
+};
+
+/* Interface to iptables/ip6tables */
+
+static int
+ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
+{
+	unsigned int *op;
+	void *data;
+	int copylen = *len, ret = 0;
+	struct net *net = sock_net(sk);
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+	if (optval != SO_IP_SET)
+		return -EBADF;
+	if (*len < sizeof(unsigned int))
+		return -EINVAL;
+
+	data = vmalloc(*len);
+	if (!data)
+		return -ENOMEM;
+	if (copy_from_user(data, user, *len) != 0) {
+		ret = -EFAULT;
+		goto done;
+	}
+	op = data;
+
+	if (*op < IP_SET_OP_VERSION) {
+		/* Check the version at the beginning of operations */
+		struct ip_set_req_version *req_version = data;
+
+		if (*len < sizeof(struct ip_set_req_version)) {
+			ret = -EINVAL;
+			goto done;
+		}
+
+		if (req_version->version < IPSET_PROTOCOL_MIN) {
+			ret = -EPROTO;
+			goto done;
+		}
+	}
+
+	switch (*op) {
+	case IP_SET_OP_VERSION: {
+		struct ip_set_req_version *req_version = data;
+
+		if (*len != sizeof(struct ip_set_req_version)) {
+			ret = -EINVAL;
+			goto done;
+		}
+
+		req_version->version = IPSET_PROTOCOL;
+		if (copy_to_user(user, req_version,
+				 sizeof(struct ip_set_req_version)))
+			ret = -EFAULT;
+		goto done;
+	}
+	case IP_SET_OP_GET_BYNAME: {
+		struct ip_set_req_get_set *req_get = data;
+		ip_set_id_t id;
+
+		if (*len != sizeof(struct ip_set_req_get_set)) {
+			ret = -EINVAL;
+			goto done;
+		}
+		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
+		nfnl_lock(NFNL_SUBSYS_IPSET);
+		find_set_and_id(inst, req_get->set.name, &id);
+		req_get->set.index = id;
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
+		goto copy;
+	}
+	case IP_SET_OP_GET_FNAME: {
+		struct ip_set_req_get_set_family *req_get = data;
+		ip_set_id_t id;
+
+		if (*len != sizeof(struct ip_set_req_get_set_family)) {
+			ret = -EINVAL;
+			goto done;
+		}
+		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
+		nfnl_lock(NFNL_SUBSYS_IPSET);
+		find_set_and_id(inst, req_get->set.name, &id);
+		req_get->set.index = id;
+		if (id != IPSET_INVALID_ID)
+			req_get->family = ip_set(inst, id)->family;
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
+		goto copy;
+	}
+	case IP_SET_OP_GET_BYINDEX: {
+		struct ip_set_req_get_set *req_get = data;
+		struct ip_set *set;
+
+		if (*len != sizeof(struct ip_set_req_get_set) ||
+		    req_get->set.index >= inst->ip_set_max) {
+			ret = -EINVAL;
+			goto done;
+		}
+		nfnl_lock(NFNL_SUBSYS_IPSET);
+		set = ip_set(inst, req_get->set.index);
+		ret = strscpy(req_get->set.name, set ? set->name : "",
+			      IPSET_MAXNAMELEN);
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
+		if (ret < 0)
+			goto done;
+		goto copy;
+	}
+	default:
+		ret = -EBADMSG;
+		goto done;
+	}	/* end of switch(op) */
+
+copy:
+	if (copy_to_user(user, data, copylen))
+		ret = -EFAULT;
+
+done:
+	vfree(data);
+	if (ret > 0)
+		ret = 0;
+	return ret;
+}
+
+static struct nf_sockopt_ops so_set __read_mostly = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_IP_SET,
+	.get_optmax	= SO_IP_SET + 1,
+	.get		= ip_set_sockfn_get,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init
+ip_set_net_init(struct net *net)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set **list;
+
+	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
+	if (inst->ip_set_max >= IPSET_INVALID_ID)
+		inst->ip_set_max = IPSET_INVALID_ID - 1;
+
+	list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+	inst->is_deleted = false;
+	inst->is_destroyed = false;
+	rcu_assign_pointer(inst->ip_set_list, list);
+	return 0;
+}
+
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	struct ip_set *set = NULL;
+	ip_set_id_t i;
+
+	inst->is_deleted = true; /* flag for ip_set_nfnl_put */
+
+	nfnl_lock(NFNL_SUBSYS_IPSET);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = ip_set(inst, i);
+		if (set) {
+			ip_set(inst, i) = NULL;
+			ip_set_destroy_set(set);
+		}
+	}
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
+	kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
+}
+
+static struct pernet_operations ip_set_net_ops = {
+	.init	= ip_set_net_init,
+	.exit   = ip_set_net_exit,
+	.id	= &ip_set_net_id,
+	.size	= sizeof(struct ip_set_net),
+};
+
+static int __init
+ip_set_init(void)
+{
+	int ret = register_pernet_subsys(&ip_set_net_ops);
+
+	if (ret) {
+		pr_err("ip_set: cannot register pernet_subsys.\n");
+		return ret;
+	}
+
+	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+	if (ret != 0) {
+		pr_err("ip_set: cannot register with nfnetlink.\n");
+		unregister_pernet_subsys(&ip_set_net_ops);
+		return ret;
+	}
+
+	ret = nf_register_sockopt(&so_set);
+	if (ret != 0) {
+		pr_err("SO_SET registry failed: %d\n", ret);
+		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		unregister_pernet_subsys(&ip_set_net_ops);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit
+ip_set_fini(void)
+{
+	nf_unregister_sockopt(&so_set);
+	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+
+	unregister_pernet_subsys(&ip_set_net_ops);
+	pr_debug("these are the famous last words\n");
+}
+
+module_init(ip_set_init);
+module_exit(ip_set_fini);
+
+MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
new file mode 100644
index 000000000..36615eb3e
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Get Layer-4 data from the packets */
+
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/sctp.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/export.h>
+
+/* We must handle non-linear skbs */
+static bool
+get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
+	 bool src, __be16 *port, u8 *proto)
+{
+	switch (protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph;
+		const struct tcphdr *th;
+
+		th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
+		if (!th)
+			/* No choice either */
+			return false;
+
+		*port = src ? th->source : th->dest;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr _sh;
+		const struct sctphdr *sh;
+
+		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
+		if (!sh)
+			/* No choice either */
+			return false;
+
+		*port = src ? sh->source : sh->dest;
+		break;
+	}
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE: {
+		struct udphdr _udph;
+		const struct udphdr *uh;
+
+		uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
+		if (!uh)
+			/* No choice either */
+			return false;
+
+		*port = src ? uh->source : uh->dest;
+		break;
+	}
+	case IPPROTO_ICMP: {
+		struct icmphdr _ich;
+		const struct icmphdr *ic;
+
+		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
+		if (!ic)
+			return false;
+
+		*port = (__force __be16)htons((ic->type << 8) | ic->code);
+		break;
+	}
+	case IPPROTO_ICMPV6: {
+		struct icmp6hdr _ich;
+		const struct icmp6hdr *ic;
+
+		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
+		if (!ic)
+			return false;
+
+		*port = (__force __be16)
+			htons((ic->icmp6_type << 8) | ic->icmp6_code);
+		break;
+	}
+	default:
+		break;
+	}
+	*proto = protocol;
+
+	return true;
+}
+
+bool
+ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	unsigned int protooff = skb_network_offset(skb) + ip_hdrlen(skb);
+	int protocol = iph->protocol;
+
+	/* See comments at tcp_match in ip_tables.c */
+	if (protocol <= 0)
+		return false;
+
+	if (ntohs(iph->frag_off) & IP_OFFSET)
+		switch (protocol) {
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+		case IPPROTO_UDP:
+		case IPPROTO_UDPLITE:
+		case IPPROTO_ICMP:
+			/* Port info not available for fragment offset > 0 */
+			return false;
+		default:
+			/* Other protocols doesn't have ports,
+			 * so we can match fragments.
+			 */
+			*proto = protocol;
+			return true;
+		}
+
+	return get_port(skb, protocol, protooff, src, port, proto);
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+bool
+ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	int protoff;
+	u8 nexthdr;
+	__be16 frag_off = 0;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb,
+				   skb_network_offset(skb) +
+					sizeof(struct ipv6hdr), &nexthdr,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+		return false;
+
+	return get_port(skb, nexthdr, protoff, src, port, proto);
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
+#endif
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
new file mode 100644
index 000000000..7499192af
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -0,0 +1,1582 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+#ifndef _IP_SET_HASH_GEN_H
+#define _IP_SET_HASH_GEN_H
+
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/ipset/ip_set.h>
+
+#define __ipset_dereference(p)		\
+	rcu_dereference_protected(p, 1)
+#define ipset_dereference_nfnl(p)	\
+	rcu_dereference_protected(p,	\
+		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+#define ipset_dereference_set(p, set) 	\
+	rcu_dereference_protected(p,	\
+		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+		lockdep_is_held(&(set)->lock))
+#define ipset_dereference_bh_nfnl(p)	\
+	rcu_dereference_bh_check(p, 	\
+		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+
+/* Hashing which uses arrays to resolve clashing. The hash table is resized
+ * (doubled) when searching becomes too long.
+ * Internally jhash is used with the assumption that the size of the
+ * stored data is a multiple of sizeof(u32).
+ *
+ * Readers and resizing
+ *
+ * Resizing can be triggered by userspace command only, and those
+ * are serialized by the nfnl mutex. During resizing the set is
+ * read-locked, so the only possible concurrent operations are
+ * the kernel side readers. Those must be protected by proper RCU locking.
+ */
+
+/* Number of elements to store in an initial array block */
+#define AHASH_INIT_SIZE			2
+/* Max number of elements to store in an array block */
+#define AHASH_MAX_SIZE			(6 * AHASH_INIT_SIZE)
+/* Max muber of elements in the array block when tuned */
+#define AHASH_MAX_TUNED			64
+#define AHASH_MAX(h)			((h)->bucketsize)
+
+/* A hash bucket */
+struct hbucket {
+	struct rcu_head rcu;	/* for call_rcu */
+	/* Which positions are used in the array */
+	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
+	u8 size;		/* size of the array */
+	u8 pos;			/* position of the first free entry */
+	unsigned char value[]	/* the array of the values */
+		__aligned(__alignof__(u64));
+};
+
+/* Region size for locking == 2^HTABLE_REGION_BITS */
+#define HTABLE_REGION_BITS	10
+#define ahash_numof_locks(htable_bits)		\
+	((htable_bits) < HTABLE_REGION_BITS ? 1	\
+		: jhash_size((htable_bits) - HTABLE_REGION_BITS))
+#define ahash_sizeof_regions(htable_bits)		\
+	(ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
+#define ahash_region(n, htable_bits)		\
+	((n) % ahash_numof_locks(htable_bits))
+#define ahash_bucket_start(h,  htable_bits)	\
+	((htable_bits) < HTABLE_REGION_BITS ? 0	\
+		: (h) * jhash_size(HTABLE_REGION_BITS))
+#define ahash_bucket_end(h,  htable_bits)	\
+	((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits)	\
+		: ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
+
+struct htable_gc {
+	struct delayed_work dwork;
+	struct ip_set *set;	/* Set the gc belongs to */
+	u32 region;		/* Last gc run position */
+};
+
+/* The hash table: the table size stored here in order to make resizing easy */
+struct htable {
+	atomic_t ref;		/* References for resizing */
+	atomic_t uref;		/* References for dumping and gc */
+	u8 htable_bits;		/* size of hash table == 2^htable_bits */
+	u32 maxelem;		/* Maxelem per region */
+	struct ip_set_region *hregion;	/* Region locks and ext sizes */
+	struct hbucket __rcu *bucket[]; /* hashtable buckets */
+};
+
+#define hbucket(h, i)		((h)->bucket[i])
+#define ext_size(n, dsize)	\
+	(sizeof(struct hbucket) + (n) * (dsize))
+
+#ifndef IPSET_NET_COUNT
+#define IPSET_NET_COUNT		1
+#endif
+
+/* Book-keeping of the prefixes added to the set */
+struct net_prefixes {
+	u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
+	u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
+};
+
+/* Compute the hash table size */
+static size_t
+htable_size(u8 hbits)
+{
+	size_t hsize;
+
+	/* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
+	if (hbits > 31)
+		return 0;
+	hsize = jhash_size(hbits);
+	if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
+	    < hsize)
+		return 0;
+
+	return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+#if IPSET_NET_COUNT > 1
+#define __CIDR(cidr, i)		(cidr[i])
+#else
+#define __CIDR(cidr, i)		(cidr)
+#endif
+
+/* cidr + 1 is stored in net_prefixes to support /0 */
+#define NCIDR_PUT(cidr)		((cidr) + 1)
+#define NCIDR_GET(cidr)		((cidr) - 1)
+
+#ifdef IP_SET_HASH_WITH_NETS_PACKED
+/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
+#define DCIDR_PUT(cidr)		((cidr) - 1)
+#define DCIDR_GET(cidr, i)	(__CIDR(cidr, i) + 1)
+#else
+#define DCIDR_PUT(cidr)		(cidr)
+#define DCIDR_GET(cidr, i)	__CIDR(cidr, i)
+#endif
+
+#define INIT_CIDR(cidr, host_mask)	\
+	DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
+
+#ifdef IP_SET_HASH_WITH_NET0
+/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
+#define NLEN			(HOST_MASK + 1)
+#define CIDR_POS(c)		((c) - 1)
+#else
+/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
+#define NLEN			HOST_MASK
+#define CIDR_POS(c)		((c) - 2)
+#endif
+
+#else
+#define NLEN			0
+#endif /* IP_SET_HASH_WITH_NETS */
+
+#define SET_ELEM_EXPIRED(set, d)	\
+	(SET_WITH_TIMEOUT(set) &&	\
+	 ip_set_timeout_expired(ext_timeout(d, set)))
+
+#endif /* _IP_SET_HASH_GEN_H */
+
+#ifndef MTYPE
+#error "MTYPE is not defined!"
+#endif
+
+#ifndef HTYPE
+#error "HTYPE is not defined!"
+#endif
+
+#ifndef HOST_MASK
+#error "HOST_MASK is not defined!"
+#endif
+
+/* Family dependent templates */
+
+#undef ahash_data
+#undef mtype_data_equal
+#undef mtype_do_data_match
+#undef mtype_data_set_flags
+#undef mtype_data_reset_elem
+#undef mtype_data_reset_flags
+#undef mtype_data_netmask
+#undef mtype_data_list
+#undef mtype_data_next
+#undef mtype_elem
+
+#undef mtype_ahash_destroy
+#undef mtype_ext_cleanup
+#undef mtype_add_cidr
+#undef mtype_del_cidr
+#undef mtype_ahash_memsize
+#undef mtype_flush
+#undef mtype_destroy
+#undef mtype_same_set
+#undef mtype_kadt
+#undef mtype_uadt
+
+#undef mtype_add
+#undef mtype_del
+#undef mtype_test_cidrs
+#undef mtype_test
+#undef mtype_uref
+#undef mtype_resize
+#undef mtype_ext_size
+#undef mtype_resize_ad
+#undef mtype_head
+#undef mtype_list
+#undef mtype_gc_do
+#undef mtype_gc
+#undef mtype_gc_init
+#undef mtype_variant
+#undef mtype_data_match
+
+#undef htype
+#undef HKEY
+
+#define mtype_data_equal	IPSET_TOKEN(MTYPE, _data_equal)
+#ifdef IP_SET_HASH_WITH_NETS
+#define mtype_do_data_match	IPSET_TOKEN(MTYPE, _do_data_match)
+#else
+#define mtype_do_data_match(d)	1
+#endif
+#define mtype_data_set_flags	IPSET_TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_elem	IPSET_TOKEN(MTYPE, _data_reset_elem)
+#define mtype_data_reset_flags	IPSET_TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask	IPSET_TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
+#define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+
+#define mtype_ahash_destroy	IPSET_TOKEN(MTYPE, _ahash_destroy)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
+
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_test_cidrs	IPSET_TOKEN(MTYPE, _test_cidrs)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref		IPSET_TOKEN(MTYPE, _uref)
+#define mtype_resize		IPSET_TOKEN(MTYPE, _resize)
+#define mtype_ext_size		IPSET_TOKEN(MTYPE, _ext_size)
+#define mtype_resize_ad		IPSET_TOKEN(MTYPE, _resize_ad)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc_do		IPSET_TOKEN(MTYPE, _gc_do)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_variant		IPSET_TOKEN(MTYPE, _variant)
+#define mtype_data_match	IPSET_TOKEN(MTYPE, _data_match)
+
+#ifndef HKEY_DATALEN
+#define HKEY_DATALEN		sizeof(struct mtype_elem)
+#endif
+
+#define htype			MTYPE
+
+#define HKEY(data, initval, htable_bits)			\
+({								\
+	const u32 *__k = (const u32 *)data;			\
+	u32 __l = HKEY_DATALEN / sizeof(u32);			\
+								\
+	BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);		\
+								\
+	jhash2(__k, __l, initval) & jhash_mask(htable_bits);	\
+})
+
+/* The generic hash structure */
+struct htype {
+	struct htable __rcu *table; /* the hash table */
+	struct htable_gc gc;	/* gc workqueue */
+	u32 maxelem;		/* max elements in the hash */
+	u32 initval;		/* random jhash init value */
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	u32 markmask;		/* markmask value for mark mask to store */
+#endif
+	u8 bucketsize;		/* max elements in an array block */
+#ifdef IP_SET_HASH_WITH_NETMASK
+	u8 netmask;		/* netmask value for subnets to store */
+#endif
+	struct list_head ad;	/* Resize add|del backlist */
+	struct mtype_elem next; /* temporary storage for uadd */
+#ifdef IP_SET_HASH_WITH_NETS
+	struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
+#endif
+};
+
+/* ADD|DEL entries saved during resize */
+struct mtype_resize_ad {
+	struct list_head list;
+	enum ipset_adt ad;	/* ADD|DEL element */
+	struct mtype_elem d;	/* Element value */
+	struct ip_set_ext ext;	/* Extensions for ADD */
+	struct ip_set_ext mext;	/* Target extensions for ADD */
+	u32 flags;		/* Flags for ADD */
+};
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Network cidr size book keeping when the hash stores different
+ * sized networks. cidr == real cidr + 1 to support /0.
+ */
+static void
+mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
+{
+	int i, j;
+
+	spin_lock_bh(&set->lock);
+	/* Add in increasing prefix order, so larger cidr first */
+	for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
+		if (j != -1) {
+			continue;
+		} else if (h->nets[i].cidr[n] < cidr) {
+			j = i;
+		} else if (h->nets[i].cidr[n] == cidr) {
+			h->nets[CIDR_POS(cidr)].nets[n]++;
+			goto unlock;
+		}
+	}
+	if (j != -1) {
+		for (; i > j; i--)
+			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
+	}
+	h->nets[i].cidr[n] = cidr;
+	h->nets[CIDR_POS(cidr)].nets[n] = 1;
+unlock:
+	spin_unlock_bh(&set->lock);
+}
+
+static void
+mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
+{
+	u8 i, j, net_end = NLEN - 1;
+
+	spin_lock_bh(&set->lock);
+	for (i = 0; i < NLEN; i++) {
+		if (h->nets[i].cidr[n] != cidr)
+			continue;
+		h->nets[CIDR_POS(cidr)].nets[n]--;
+		if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
+			goto unlock;
+		for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
+			h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+		h->nets[j].cidr[n] = 0;
+		goto unlock;
+	}
+unlock:
+	spin_unlock_bh(&set->lock);
+}
+#endif
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_ahash_memsize(const struct htype *h, const struct htable *t)
+{
+	return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
+}
+
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize)	\
+	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+static void
+mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
+{
+	int i;
+
+	for (i = 0; i < n->pos; i++)
+		if (test_bit(i, n->used))
+			ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+}
+
+/* Flush a hash type of set: destroy all elements */
+static void
+mtype_flush(struct ip_set *set)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+	struct hbucket *n;
+	u32 r, i;
+
+	t = ipset_dereference_nfnl(h->table);
+	for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+		spin_lock_bh(&t->hregion[r].lock);
+		for (i = ahash_bucket_start(r, t->htable_bits);
+		     i < ahash_bucket_end(r, t->htable_bits); i++) {
+			n = __ipset_dereference(hbucket(t, i));
+			if (!n)
+				continue;
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
+			/* FIXME: use slab cache */
+			rcu_assign_pointer(hbucket(t, i), NULL);
+			kfree_rcu(n, rcu);
+		}
+		t->hregion[r].ext_size = 0;
+		t->hregion[r].elements = 0;
+		spin_unlock_bh(&t->hregion[r].lock);
+	}
+#ifdef IP_SET_HASH_WITH_NETS
+	memset(h->nets, 0, sizeof(h->nets));
+#endif
+}
+
+/* Destroy the hashtable part of the set */
+static void
+mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
+{
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = __ipset_dereference(hbucket(t, i));
+		if (!n)
+			continue;
+		if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+			mtype_ext_cleanup(set, n);
+		/* FIXME: use slab cache */
+		kfree(n);
+	}
+
+	ip_set_free(t->hregion);
+	ip_set_free(t);
+}
+
+/* Destroy a hash type of set */
+static void
+mtype_destroy(struct ip_set *set)
+{
+	struct htype *h = set->data;
+	struct list_head *l, *lt;
+
+	if (SET_WITH_TIMEOUT(set))
+		cancel_delayed_work_sync(&h->gc.dwork);
+
+	mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+	list_for_each_safe(l, lt, &h->ad) {
+		list_del(l);
+		kfree(l);
+	}
+	kfree(h);
+
+	set->data = NULL;
+}
+
+static bool
+mtype_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct htype *x = a->data;
+	const struct htype *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       a->timeout == b->timeout &&
+#ifdef IP_SET_HASH_WITH_NETMASK
+	       x->netmask == y->netmask &&
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	       x->markmask == y->markmask &&
+#endif
+	       a->extensions == b->extensions;
+}
+
+static void
+mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
+{
+	struct hbucket *n, *tmp;
+	struct mtype_elem *data;
+	u32 i, j, d;
+	size_t dsize = set->dsize;
+#ifdef IP_SET_HASH_WITH_NETS
+	u8 k;
+#endif
+	u8 htable_bits = t->htable_bits;
+
+	spin_lock_bh(&t->hregion[r].lock);
+	for (i = ahash_bucket_start(r, htable_bits);
+	     i < ahash_bucket_end(r, htable_bits); i++) {
+		n = __ipset_dereference(hbucket(t, i));
+		if (!n)
+			continue;
+		for (j = 0, d = 0; j < n->pos; j++) {
+			if (!test_bit(j, n->used)) {
+				d++;
+				continue;
+			}
+			data = ahash_data(n, j, dsize);
+			if (!ip_set_timeout_expired(ext_timeout(data, set)))
+				continue;
+			pr_debug("expired %u/%u\n", i, j);
+			clear_bit(j, n->used);
+			smp_mb__after_atomic();
+#ifdef IP_SET_HASH_WITH_NETS
+			for (k = 0; k < IPSET_NET_COUNT; k++)
+				mtype_del_cidr(set, h,
+					NCIDR_PUT(DCIDR_GET(data->cidr, k)),
+					k);
+#endif
+			t->hregion[r].elements--;
+			ip_set_ext_destroy(set, data);
+			d++;
+		}
+		if (d >= AHASH_INIT_SIZE) {
+			if (d >= n->size) {
+				t->hregion[r].ext_size -=
+					ext_size(n->size, dsize);
+				rcu_assign_pointer(hbucket(t, i), NULL);
+				kfree_rcu(n, rcu);
+				continue;
+			}
+			tmp = kzalloc(sizeof(*tmp) +
+				(n->size - AHASH_INIT_SIZE) * dsize,
+				GFP_ATOMIC);
+			if (!tmp)
+				/* Still try to delete expired elements. */
+				continue;
+			tmp->size = n->size - AHASH_INIT_SIZE;
+			for (j = 0, d = 0; j < n->pos; j++) {
+				if (!test_bit(j, n->used))
+					continue;
+				data = ahash_data(n, j, dsize);
+				memcpy(tmp->value + d * dsize,
+				       data, dsize);
+				set_bit(d, tmp->used);
+				d++;
+			}
+			tmp->pos = d;
+			t->hregion[r].ext_size -=
+				ext_size(AHASH_INIT_SIZE, dsize);
+			rcu_assign_pointer(hbucket(t, i), tmp);
+			kfree_rcu(n, rcu);
+		}
+	}
+	spin_unlock_bh(&t->hregion[r].lock);
+}
+
+static void
+mtype_gc(struct work_struct *work)
+{
+	struct htable_gc *gc;
+	struct ip_set *set;
+	struct htype *h;
+	struct htable *t;
+	u32 r, numof_locks;
+	unsigned int next_run;
+
+	gc = container_of(work, struct htable_gc, dwork.work);
+	set = gc->set;
+	h = set->data;
+
+	spin_lock_bh(&set->lock);
+	t = ipset_dereference_set(h->table, set);
+	atomic_inc(&t->uref);
+	numof_locks = ahash_numof_locks(t->htable_bits);
+	r = gc->region++;
+	if (r >= numof_locks) {
+		r = gc->region = 0;
+	}
+	next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
+	if (next_run < HZ/10)
+		next_run = HZ/10;
+	spin_unlock_bh(&set->lock);
+
+	mtype_gc_do(set, h, t, r);
+
+	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+		pr_debug("Table destroy after resize by expire: %p\n", t);
+		mtype_ahash_destroy(set, t, false);
+	}
+
+	queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
+
+}
+
+static void
+mtype_gc_init(struct htable_gc *gc)
+{
+	INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
+	queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags);
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags);
+
+/* Resize a hash: create a new hash table with doubling the hashsize
+ * and inserting the elements to it. Repeat until we succeed or
+ * fail due to memory pressures.
+ */
+static int
+mtype_resize(struct ip_set *set, bool retried)
+{
+	struct htype *h = set->data;
+	struct htable *t, *orig;
+	u8 htable_bits;
+	size_t hsize, dsize = set->dsize;
+#ifdef IP_SET_HASH_WITH_NETS
+	u8 flags;
+	struct mtype_elem *tmp;
+#endif
+	struct mtype_elem *data;
+	struct mtype_elem *d;
+	struct hbucket *n, *m;
+	struct list_head *l, *lt;
+	struct mtype_resize_ad *x;
+	u32 i, j, r, nr, key;
+	int ret;
+
+#ifdef IP_SET_HASH_WITH_NETS
+	tmp = kmalloc(dsize, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+#endif
+	orig = ipset_dereference_bh_nfnl(h->table);
+	htable_bits = orig->htable_bits;
+
+retry:
+	ret = 0;
+	htable_bits++;
+	if (!htable_bits)
+		goto hbwarn;
+	hsize = htable_size(htable_bits);
+	if (!hsize)
+		goto hbwarn;
+	t = ip_set_alloc(hsize);
+	if (!t) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
+	if (!t->hregion) {
+		ip_set_free(t);
+		ret = -ENOMEM;
+		goto out;
+	}
+	t->htable_bits = htable_bits;
+	t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
+	for (i = 0; i < ahash_numof_locks(htable_bits); i++)
+		spin_lock_init(&t->hregion[i].lock);
+
+	/* There can't be another parallel resizing,
+	 * but dumping, gc, kernel side add/del are possible
+	 */
+	orig = ipset_dereference_bh_nfnl(h->table);
+	atomic_set(&orig->ref, 1);
+	atomic_inc(&orig->uref);
+	pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+		 set->name, orig->htable_bits, htable_bits, orig);
+	for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
+		/* Expire may replace a hbucket with another one */
+		rcu_read_lock_bh();
+		for (i = ahash_bucket_start(r, orig->htable_bits);
+		     i < ahash_bucket_end(r, orig->htable_bits); i++) {
+			n = __ipset_dereference(hbucket(orig, i));
+			if (!n)
+				continue;
+			for (j = 0; j < n->pos; j++) {
+				if (!test_bit(j, n->used))
+					continue;
+				data = ahash_data(n, j, dsize);
+				if (SET_ELEM_EXPIRED(set, data))
+					continue;
+#ifdef IP_SET_HASH_WITH_NETS
+				/* We have readers running parallel with us,
+				 * so the live data cannot be modified.
+				 */
+				flags = 0;
+				memcpy(tmp, data, dsize);
+				data = tmp;
+				mtype_data_reset_flags(data, &flags);
+#endif
+				key = HKEY(data, h->initval, htable_bits);
+				m = __ipset_dereference(hbucket(t, key));
+				nr = ahash_region(key, htable_bits);
+				if (!m) {
+					m = kzalloc(sizeof(*m) +
+					    AHASH_INIT_SIZE * dsize,
+					    GFP_ATOMIC);
+					if (!m) {
+						ret = -ENOMEM;
+						goto cleanup;
+					}
+					m->size = AHASH_INIT_SIZE;
+					t->hregion[nr].ext_size +=
+						ext_size(AHASH_INIT_SIZE,
+							 dsize);
+					RCU_INIT_POINTER(hbucket(t, key), m);
+				} else if (m->pos >= m->size) {
+					struct hbucket *ht;
+
+					if (m->size >= AHASH_MAX(h)) {
+						ret = -EAGAIN;
+					} else {
+						ht = kzalloc(sizeof(*ht) +
+						(m->size + AHASH_INIT_SIZE)
+						* dsize,
+						GFP_ATOMIC);
+						if (!ht)
+							ret = -ENOMEM;
+					}
+					if (ret < 0)
+						goto cleanup;
+					memcpy(ht, m, sizeof(struct hbucket) +
+					       m->size * dsize);
+					ht->size = m->size + AHASH_INIT_SIZE;
+					t->hregion[nr].ext_size +=
+						ext_size(AHASH_INIT_SIZE,
+							 dsize);
+					kfree(m);
+					m = ht;
+					RCU_INIT_POINTER(hbucket(t, key), ht);
+				}
+				d = ahash_data(m, m->pos, dsize);
+				memcpy(d, data, dsize);
+				set_bit(m->pos++, m->used);
+				t->hregion[nr].elements++;
+#ifdef IP_SET_HASH_WITH_NETS
+				mtype_data_reset_flags(d, &flags);
+#endif
+			}
+		}
+		rcu_read_unlock_bh();
+	}
+
+	/* There can't be any other writer. */
+	rcu_assign_pointer(h->table, t);
+
+	/* Give time to other readers of the set */
+	synchronize_rcu();
+
+	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
+		 orig->htable_bits, orig, t->htable_bits, t);
+	/* Add/delete elements processed by the SET target during resize.
+	 * Kernel-side add cannot trigger a resize and userspace actions
+	 * are serialized by the mutex.
+	 */
+	list_for_each_safe(l, lt, &h->ad) {
+		x = list_entry(l, struct mtype_resize_ad, list);
+		if (x->ad == IPSET_ADD) {
+			mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
+		} else {
+			mtype_del(set, &x->d, NULL, NULL, 0);
+		}
+		list_del(l);
+		kfree(l);
+	}
+	/* If there's nobody else using the table, destroy it */
+	if (atomic_dec_and_test(&orig->uref)) {
+		pr_debug("Table destroy by resize %p\n", orig);
+		mtype_ahash_destroy(set, orig, false);
+	}
+
+out:
+#ifdef IP_SET_HASH_WITH_NETS
+	kfree(tmp);
+#endif
+	return ret;
+
+cleanup:
+	rcu_read_unlock_bh();
+	atomic_set(&orig->ref, 0);
+	atomic_dec(&orig->uref);
+	mtype_ahash_destroy(set, t, false);
+	if (ret == -EAGAIN)
+		goto retry;
+	goto out;
+
+hbwarn:
+	/* In case we have plenty of memory :-) */
+	pr_warn("Cannot increase the hashsize of set %s further\n", set->name);
+	ret = -IPSET_ERR_HASH_FULL;
+	goto out;
+}
+
+/* Get the current number of elements and ext_size in the set  */
+static void
+mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
+{
+	struct htype *h = set->data;
+	const struct htable *t;
+	u32 i, j, r;
+	struct hbucket *n;
+	struct mtype_elem *data;
+
+	t = rcu_dereference_bh(h->table);
+	for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+		for (i = ahash_bucket_start(r, t->htable_bits);
+		     i < ahash_bucket_end(r, t->htable_bits); i++) {
+			n = rcu_dereference_bh(hbucket(t, i));
+			if (!n)
+				continue;
+			for (j = 0; j < n->pos; j++) {
+				if (!test_bit(j, n->used))
+					continue;
+				data = ahash_data(n, j, set->dsize);
+				if (!SET_ELEM_EXPIRED(set, data))
+					(*elements)++;
+			}
+		}
+		*ext_size += t->hregion[r].ext_size;
+	}
+}
+
+/* Add an element to a hash and update the internal counters when succeeded,
+ * otherwise report the proper error code.
+ */
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+	const struct mtype_elem *d = value;
+	struct mtype_elem *data;
+	struct hbucket *n, *old = ERR_PTR(-ENOENT);
+	int i, j = -1, ret;
+	bool flag_exist = flags & IPSET_FLAG_EXIST;
+	bool deleted = false, forceadd = false, reuse = false;
+	u32 r, key, multi = 0, elements, maxelem;
+
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
+	key = HKEY(value, h->initval, t->htable_bits);
+	r = ahash_region(key, t->htable_bits);
+	atomic_inc(&t->uref);
+	elements = t->hregion[r].elements;
+	maxelem = t->maxelem;
+	if (elements >= maxelem) {
+		u32 e;
+		if (SET_WITH_TIMEOUT(set)) {
+			rcu_read_unlock_bh();
+			mtype_gc_do(set, h, t, r);
+			rcu_read_lock_bh();
+		}
+		maxelem = h->maxelem;
+		elements = 0;
+		for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
+			elements += t->hregion[e].elements;
+		if (elements >= maxelem && SET_WITH_FORCEADD(set))
+			forceadd = true;
+	}
+	rcu_read_unlock_bh();
+
+	spin_lock_bh(&t->hregion[r].lock);
+	n = rcu_dereference_bh(hbucket(t, key));
+	if (!n) {
+		if (forceadd || elements >= maxelem)
+			goto set_full;
+		old = NULL;
+		n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
+			    GFP_ATOMIC);
+		if (!n) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		n->size = AHASH_INIT_SIZE;
+		t->hregion[r].ext_size +=
+			ext_size(AHASH_INIT_SIZE, set->dsize);
+		goto copy_elem;
+	}
+	for (i = 0; i < n->pos; i++) {
+		if (!test_bit(i, n->used)) {
+			/* Reuse first deleted entry */
+			if (j == -1) {
+				deleted = reuse = true;
+				j = i;
+			}
+			continue;
+		}
+		data = ahash_data(n, i, set->dsize);
+		if (mtype_data_equal(data, d, &multi)) {
+			if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
+				/* Just the extensions could be overwritten */
+				j = i;
+				goto overwrite_extensions;
+			}
+			ret = -IPSET_ERR_EXIST;
+			goto unlock;
+		}
+		/* Reuse first timed out entry */
+		if (SET_ELEM_EXPIRED(set, data) && j == -1) {
+			j = i;
+			reuse = true;
+		}
+	}
+	if (reuse || forceadd) {
+		if (j == -1)
+			j = 0;
+		data = ahash_data(n, j, set->dsize);
+		if (!deleted) {
+#ifdef IP_SET_HASH_WITH_NETS
+			for (i = 0; i < IPSET_NET_COUNT; i++)
+				mtype_del_cidr(set, h,
+					NCIDR_PUT(DCIDR_GET(data->cidr, i)),
+					i);
+#endif
+			ip_set_ext_destroy(set, data);
+			t->hregion[r].elements--;
+		}
+		goto copy_data;
+	}
+	if (elements >= maxelem)
+		goto set_full;
+	/* Create a new slot */
+	if (n->pos >= n->size) {
+#ifdef IP_SET_HASH_WITH_MULTI
+		if (h->bucketsize >= AHASH_MAX_TUNED)
+			goto set_full;
+		else if (h->bucketsize <= multi)
+			h->bucketsize += AHASH_INIT_SIZE;
+#endif
+		if (n->size >= AHASH_MAX(h)) {
+			/* Trigger rehashing */
+			mtype_data_next(&h->next, d);
+			ret = -EAGAIN;
+			goto resize;
+		}
+		old = n;
+		n = kzalloc(sizeof(*n) +
+			    (old->size + AHASH_INIT_SIZE) * set->dsize,
+			    GFP_ATOMIC);
+		if (!n) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		memcpy(n, old, sizeof(struct hbucket) +
+		       old->size * set->dsize);
+		n->size = old->size + AHASH_INIT_SIZE;
+		t->hregion[r].ext_size +=
+			ext_size(AHASH_INIT_SIZE, set->dsize);
+	}
+
+copy_elem:
+	j = n->pos++;
+	data = ahash_data(n, j, set->dsize);
+copy_data:
+	t->hregion[r].elements++;
+#ifdef IP_SET_HASH_WITH_NETS
+	for (i = 0; i < IPSET_NET_COUNT; i++)
+		mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
+#endif
+	memcpy(data, d, sizeof(struct mtype_elem));
+overwrite_extensions:
+#ifdef IP_SET_HASH_WITH_NETS
+	mtype_data_set_flags(data, flags);
+#endif
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(data, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(set, ext_comment(data, set), ext);
+	if (SET_WITH_SKBINFO(set))
+		ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
+	/* Must come last for the case when timed out entry is reused */
+	if (SET_WITH_TIMEOUT(set))
+		ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
+	smp_mb__before_atomic();
+	set_bit(j, n->used);
+	if (old != ERR_PTR(-ENOENT)) {
+		rcu_assign_pointer(hbucket(t, key), n);
+		if (old)
+			kfree_rcu(old, rcu);
+	}
+	ret = 0;
+resize:
+	spin_unlock_bh(&t->hregion[r].lock);
+	if (atomic_read(&t->ref) && ext->target) {
+		/* Resize is in process and kernel side add, save values */
+		struct mtype_resize_ad *x;
+
+		x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
+		if (!x)
+			/* Don't bother */
+			goto out;
+		x->ad = IPSET_ADD;
+		memcpy(&x->d, value, sizeof(struct mtype_elem));
+		memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
+		memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
+		x->flags = flags;
+		spin_lock_bh(&set->lock);
+		list_add_tail(&x->list, &h->ad);
+		spin_unlock_bh(&set->lock);
+	}
+	goto out;
+
+set_full:
+	if (net_ratelimit())
+		pr_warn("Set %s is full, maxelem %u reached\n",
+			set->name, maxelem);
+	ret = -IPSET_ERR_HASH_FULL;
+unlock:
+	spin_unlock_bh(&t->hregion[r].lock);
+out:
+	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+		pr_debug("Table destroy after resize by add: %p\n", t);
+		mtype_ahash_destroy(set, t, false);
+	}
+	return ret;
+}
+
+/* Delete an element from the hash and free up space if possible.
+ */
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	  struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+	const struct mtype_elem *d = value;
+	struct mtype_elem *data;
+	struct hbucket *n;
+	struct mtype_resize_ad *x = NULL;
+	int i, j, k, r, ret = -IPSET_ERR_EXIST;
+	u32 key, multi = 0;
+	size_t dsize = set->dsize;
+
+	/* Userspace add and resize is excluded by the mutex.
+	 * Kernespace add does not trigger resize.
+	 */
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
+	key = HKEY(value, h->initval, t->htable_bits);
+	r = ahash_region(key, t->htable_bits);
+	atomic_inc(&t->uref);
+	rcu_read_unlock_bh();
+
+	spin_lock_bh(&t->hregion[r].lock);
+	n = rcu_dereference_bh(hbucket(t, key));
+	if (!n)
+		goto out;
+	for (i = 0, k = 0; i < n->pos; i++) {
+		if (!test_bit(i, n->used)) {
+			k++;
+			continue;
+		}
+		data = ahash_data(n, i, dsize);
+		if (!mtype_data_equal(data, d, &multi))
+			continue;
+		if (SET_ELEM_EXPIRED(set, data))
+			goto out;
+
+		ret = 0;
+		clear_bit(i, n->used);
+		smp_mb__after_atomic();
+		if (i + 1 == n->pos)
+			n->pos--;
+		t->hregion[r].elements--;
+#ifdef IP_SET_HASH_WITH_NETS
+		for (j = 0; j < IPSET_NET_COUNT; j++)
+			mtype_del_cidr(set, h,
+				       NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
+#endif
+		ip_set_ext_destroy(set, data);
+
+		if (atomic_read(&t->ref) && ext->target) {
+			/* Resize is in process and kernel side del,
+			 * save values
+			 */
+			x = kzalloc(sizeof(struct mtype_resize_ad),
+				    GFP_ATOMIC);
+			if (x) {
+				x->ad = IPSET_DEL;
+				memcpy(&x->d, value,
+				       sizeof(struct mtype_elem));
+				x->flags = flags;
+			}
+		}
+		for (; i < n->pos; i++) {
+			if (!test_bit(i, n->used))
+				k++;
+		}
+		if (n->pos == 0 && k == 0) {
+			t->hregion[r].ext_size -= ext_size(n->size, dsize);
+			rcu_assign_pointer(hbucket(t, key), NULL);
+			kfree_rcu(n, rcu);
+		} else if (k >= AHASH_INIT_SIZE) {
+			struct hbucket *tmp = kzalloc(sizeof(*tmp) +
+					(n->size - AHASH_INIT_SIZE) * dsize,
+					GFP_ATOMIC);
+			if (!tmp)
+				goto out;
+			tmp->size = n->size - AHASH_INIT_SIZE;
+			for (j = 0, k = 0; j < n->pos; j++) {
+				if (!test_bit(j, n->used))
+					continue;
+				data = ahash_data(n, j, dsize);
+				memcpy(tmp->value + k * dsize, data, dsize);
+				set_bit(k, tmp->used);
+				k++;
+			}
+			tmp->pos = k;
+			t->hregion[r].ext_size -=
+				ext_size(AHASH_INIT_SIZE, dsize);
+			rcu_assign_pointer(hbucket(t, key), tmp);
+			kfree_rcu(n, rcu);
+		}
+		goto out;
+	}
+
+out:
+	spin_unlock_bh(&t->hregion[r].lock);
+	if (x) {
+		spin_lock_bh(&set->lock);
+		list_add(&x->list, &h->ad);
+		spin_unlock_bh(&set->lock);
+	}
+	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+		pr_debug("Table destroy after resize by del: %p\n", t);
+		mtype_ahash_destroy(set, t, false);
+	}
+	return ret;
+}
+
+static int
+mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
+		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
+{
+	if (!ip_set_match_extensions(set, ext, mext, flags, data))
+		return 0;
+	/* nomatch entries return -ENOTEMPTY */
+	return mtype_do_data_match(data);
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Special test function which takes into account the different network
+ * sizes added to the set
+ */
+static int
+mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
+		 const struct ip_set_ext *ext,
+		 struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t = rcu_dereference_bh(h->table);
+	struct hbucket *n;
+	struct mtype_elem *data;
+#if IPSET_NET_COUNT == 2
+	struct mtype_elem orig = *d;
+	int ret, i, j = 0, k;
+#else
+	int ret, i, j = 0;
+#endif
+	u32 key, multi = 0;
+
+	pr_debug("test by nets\n");
+	for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
+#if IPSET_NET_COUNT == 2
+		mtype_data_reset_elem(d, &orig);
+		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
+		for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
+		     k++) {
+			mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
+					   true);
+#else
+		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
+#endif
+		key = HKEY(d, h->initval, t->htable_bits);
+		n = rcu_dereference_bh(hbucket(t, key));
+		if (!n)
+			continue;
+		for (i = 0; i < n->pos; i++) {
+			if (!test_bit(i, n->used))
+				continue;
+			data = ahash_data(n, i, set->dsize);
+			if (!mtype_data_equal(data, d, &multi))
+				continue;
+			ret = mtype_data_match(data, ext, mext, set, flags);
+			if (ret != 0)
+				return ret;
+#ifdef IP_SET_HASH_WITH_MULTI
+			/* No match, reset multiple match flag */
+			multi = 0;
+#endif
+		}
+#if IPSET_NET_COUNT == 2
+		}
+#endif
+	}
+	return 0;
+}
+#endif
+
+/* Test whether the element is added to the set */
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	   struct ip_set_ext *mext, u32 flags)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+	struct mtype_elem *d = value;
+	struct hbucket *n;
+	struct mtype_elem *data;
+	int i, ret = 0;
+	u32 key, multi = 0;
+
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
+#ifdef IP_SET_HASH_WITH_NETS
+	/* If we test an IP address and not a network address,
+	 * try all possible network sizes
+	 */
+	for (i = 0; i < IPSET_NET_COUNT; i++)
+		if (DCIDR_GET(d->cidr, i) != HOST_MASK)
+			break;
+	if (i == IPSET_NET_COUNT) {
+		ret = mtype_test_cidrs(set, d, ext, mext, flags);
+		goto out;
+	}
+#endif
+
+	key = HKEY(d, h->initval, t->htable_bits);
+	n = rcu_dereference_bh(hbucket(t, key));
+	if (!n) {
+		ret = 0;
+		goto out;
+	}
+	for (i = 0; i < n->pos; i++) {
+		if (!test_bit(i, n->used))
+			continue;
+		data = ahash_data(n, i, set->dsize);
+		if (!mtype_data_equal(data, d, &multi))
+			continue;
+		ret = mtype_data_match(data, ext, mext, set, flags);
+		if (ret != 0)
+			goto out;
+	}
+out:
+	rcu_read_unlock_bh();
+	return ret;
+}
+
+/* Reply a HEADER request: fill out the header part of the set */
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+	struct htype *h = set->data;
+	const struct htable *t;
+	struct nlattr *nested;
+	size_t memsize;
+	u32 elements = 0;
+	size_t ext_size = 0;
+	u8 htable_bits;
+
+	rcu_read_lock_bh();
+	t = rcu_dereference_bh(h->table);
+	mtype_ext_size(set, &elements, &ext_size);
+	memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
+	htable_bits = t->htable_bits;
+	rcu_read_unlock_bh();
+
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
+			  htonl(jhash_size(htable_bits))) ||
+	    nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
+		goto nla_put_failure;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	if (h->netmask != HOST_MASK &&
+	    nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
+		goto nla_put_failure;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
+		goto nla_put_failure;
+#endif
+	if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) {
+		if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) ||
+		    nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval)))
+			goto nla_put_failure;
+	}
+	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
+		goto nla_put_failure;
+	nla_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+	struct htype *h = set->data;
+	struct htable *t;
+
+	if (start) {
+		rcu_read_lock_bh();
+		t = ipset_dereference_bh_nfnl(h->table);
+		atomic_inc(&t->uref);
+		cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
+		rcu_read_unlock_bh();
+	} else if (cb->args[IPSET_CB_PRIVATE]) {
+		t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
+		if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+			pr_debug("Table destroy after resize "
+				 " by dump: %p\n", t);
+			mtype_ahash_destroy(set, t, false);
+		}
+		cb->args[IPSET_CB_PRIVATE] = 0;
+	}
+}
+
+/* Reply a LIST/SAVE request: dump the elements of the specified set */
+static int
+mtype_list(const struct ip_set *set,
+	   struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct htable *t;
+	struct nlattr *atd, *nested;
+	const struct hbucket *n;
+	const struct mtype_elem *e;
+	u32 first = cb->args[IPSET_CB_ARG0];
+	/* We assume that one hash bucket fills into one page */
+	void *incomplete;
+	int i, ret = 0;
+
+	atd = nla_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+
+	pr_debug("list hash set %s\n", set->name);
+	t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
+	/* Expire may replace a hbucket with another one */
+	rcu_read_lock();
+	for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
+	     cb->args[IPSET_CB_ARG0]++) {
+		cond_resched_rcu();
+		incomplete = skb_tail_pointer(skb);
+		n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
+		pr_debug("cb->arg bucket: %lu, t %p n %p\n",
+			 cb->args[IPSET_CB_ARG0], t, n);
+		if (!n)
+			continue;
+		for (i = 0; i < n->pos; i++) {
+			if (!test_bit(i, n->used))
+				continue;
+			e = ahash_data(n, i, set->dsize);
+			if (SET_ELEM_EXPIRED(set, e))
+				continue;
+			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
+				 cb->args[IPSET_CB_ARG0], n, i, e);
+			nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+			if (!nested) {
+				if (cb->args[IPSET_CB_ARG0] == first) {
+					nla_nest_cancel(skb, atd);
+					ret = -EMSGSIZE;
+					goto out;
+				}
+				goto nla_put_failure;
+			}
+			if (mtype_data_list(skb, e))
+				goto nla_put_failure;
+			if (ip_set_put_extensions(skb, set, e, true))
+				goto nla_put_failure;
+			nla_nest_end(skb, nested);
+		}
+	}
+	nla_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[IPSET_CB_ARG0] = 0;
+
+	goto out;
+
+nla_put_failure:
+	nlmsg_trim(skb, incomplete);
+	if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
+		pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
+			set->name);
+		cb->args[IPSET_CB_ARG0] = 0;
+		ret = -EMSGSIZE;
+	} else {
+		nla_nest_end(skb, atd);
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static int
+IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+			  const struct xt_action_param *par,
+			  enum ipset_adt adt, struct ip_set_adt_opt *opt);
+
+static int
+IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+			  enum ipset_adt adt, u32 *lineno, u32 flags,
+			  bool retried);
+
+static const struct ip_set_type_variant mtype_variant = {
+	.kadt	= mtype_kadt,
+	.uadt	= mtype_uadt,
+	.adt	= {
+		[IPSET_ADD] = mtype_add,
+		[IPSET_DEL] = mtype_del,
+		[IPSET_TEST] = mtype_test,
+	},
+	.destroy = mtype_destroy,
+	.flush	= mtype_flush,
+	.head	= mtype_head,
+	.list	= mtype_list,
+	.uref	= mtype_uref,
+	.resize	= mtype_resize,
+	.same_set = mtype_same_set,
+	.region_lock = true,
+};
+
+#ifdef IP_SET_EMIT_CREATE
+static int
+IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
+			    struct nlattr *tb[], u32 flags)
+{
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	u32 markmask;
+#endif
+	u8 hbits;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	u8 netmask;
+#endif
+	size_t hsize;
+	struct htype *h;
+	struct htable *t;
+	u32 i;
+
+	pr_debug("Create set %s with family %s\n",
+		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
+
+#ifdef IP_SET_PROTO_UNDEF
+	if (set->family != NFPROTO_UNSPEC)
+		return -IPSET_ERR_INVALID_FAMILY;
+#else
+	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
+		return -IPSET_ERR_INVALID_FAMILY;
+#endif
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	/* Separated condition in order to avoid directive in argument list */
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
+		return -IPSET_ERR_PROTOCOL;
+
+	markmask = 0xffffffff;
+	if (tb[IPSET_ATTR_MARKMASK]) {
+		markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
+		if (markmask == 0)
+			return -IPSET_ERR_INVALID_MARKMASK;
+	}
+#endif
+
+#ifdef IP_SET_HASH_WITH_NETMASK
+	netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
+	if (tb[IPSET_ATTR_NETMASK]) {
+		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+		if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
+		    (set->family == NFPROTO_IPV6 && netmask > 128) ||
+		    netmask == 0)
+			return -IPSET_ERR_INVALID_NETMASK;
+	}
+#endif
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	hsize = sizeof(*h);
+	h = kzalloc(hsize, GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	/* Compute htable_bits from the user input parameter hashsize.
+	 * Assume that hashsize == 2^htable_bits,
+	 * otherwise round up to the first 2^n value.
+	 */
+	hbits = fls(hashsize - 1);
+	hsize = htable_size(hbits);
+	if (hsize == 0) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	t = ip_set_alloc(hsize);
+	if (!t) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
+	if (!t->hregion) {
+		ip_set_free(t);
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->gc.set = set;
+	for (i = 0; i < ahash_numof_locks(hbits); i++)
+		spin_lock_init(&t->hregion[i].lock);
+	h->maxelem = maxelem;
+#ifdef IP_SET_HASH_WITH_NETMASK
+	h->netmask = netmask;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	h->markmask = markmask;
+#endif
+	if (tb[IPSET_ATTR_INITVAL])
+		h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL]));
+	else
+		get_random_bytes(&h->initval, sizeof(h->initval));
+	h->bucketsize = AHASH_MAX_SIZE;
+	if (tb[IPSET_ATTR_BUCKETSIZE]) {
+		h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]);
+		if (h->bucketsize < AHASH_INIT_SIZE)
+			h->bucketsize = AHASH_INIT_SIZE;
+		else if (h->bucketsize > AHASH_MAX_SIZE)
+			h->bucketsize = AHASH_MAX_SIZE;
+		else if (h->bucketsize % 2)
+			h->bucketsize += 1;
+	}
+	t->htable_bits = hbits;
+	t->maxelem = h->maxelem / ahash_numof_locks(hbits);
+	RCU_INIT_POINTER(h->table, t);
+
+	INIT_LIST_HEAD(&h->ad);
+	set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
+	if (set->family == NFPROTO_IPV4) {
+#endif
+		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
+		set->dsize = ip_set_elem_len(set, tb,
+			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
+	} else {
+		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
+		set->dsize = ip_set_elem_len(set, tb,
+			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
+	}
+#endif
+	set->timeout = IPSET_NO_TIMEOUT;
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
+		if (set->family == NFPROTO_IPV4)
+#endif
+			IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
+#ifndef IP_SET_PROTO_UNDEF
+		else
+			IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
+#endif
+	}
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(t->htable_bits),
+		 t->htable_bits, h->maxelem, set->data, t);
+
+	return 0;
+}
+#endif /* IP_SET_EMIT_CREATE */
+
+#undef HKEY_DATALEN
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
new file mode 100644
index 000000000..24adcdd7a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Counters support */
+/*				2	   Comments support */
+/*				3	   Forceadd support */
+/*				4	   skbinfo support */
+#define IPSET_TYPE_REV_MAX	5	/* bucketsize, initval support  */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ip
+#define IP_SET_HASH_WITH_NETMASK
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ip4_elem {
+	/* Zero valued IP addresses cannot be stored */
+	__be32 ip;
+};
+
+/* Common functions */
+
+static bool
+hash_ip4_data_equal(const struct hash_ip4_elem *e1,
+		    const struct hash_ip4_elem *e2,
+		    u32 *multi)
+{
+	return e1->ip == e2->ip;
+}
+
+static bool
+hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
+{
+	next->ip = e->ip;
+}
+
+#define MTYPE		hash_ip4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ip4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ip4_elem e = { 0 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+	__be32 ip;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
+	ip &= ip_set_netmask(h->netmask);
+	if (ip == 0)
+		return -EINVAL;
+
+	e.ip = ip;
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_ip4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ip4_elem e = { 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, hosts, i = 0;
+	int ret = 0;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP]))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	ip &= ip_set_hostmask(h->netmask);
+	e.ip = htonl(ip);
+	if (e.ip == 0)
+		return -IPSET_ERR_HASH_ELEM;
+
+	if (adt == IPSET_TEST)
+		return adtfn(set, &e, &ext, &ext, flags);
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to) {
+			if (ip_to == 0)
+				return -IPSET_ERR_HASH_ELEM;
+			swap(ip, ip_to);
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	for (; ip <= ip_to; i++) {
+		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ip4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ip += hosts;
+		if (ip == 0)
+			return 0;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+/* Member elements */
+struct hash_ip6_elem {
+	union nf_inet_addr ip;
+};
+
+/* Common functions */
+
+static bool
+hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
+		    const struct hash_ip6_elem *ip2,
+		    u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
+}
+
+static void
+hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+{
+	ip6_netmask(ip, prefix);
+}
+
+static bool
+hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
+{
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_ip6
+#define HOST_MASK	128
+
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ip6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ip6_elem e = { { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	hash_ip6_netmask(&e.ip, h->netmask);
+	if (ipv6_addr_any(&e.ip.in6))
+		return -EINVAL;
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ip6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ip6_elem e = { { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP]))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+	if (unlikely(tb[IPSET_ATTR_CIDR])) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr != HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	hash_ip6_netmask(&e.ip, h->netmask);
+	if (ipv6_addr_any(&e.ip.in6))
+		return -IPSET_ERR_HASH_ELEM;
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_ip_type __read_mostly = {
+	.name		= "hash:ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ip_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ip_init(void)
+{
+	return ip_set_type_register(&hash_ip_type);
+}
+
+static void __exit
+hash_ip_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_ip_type);
+}
+
+module_init(hash_ip_init);
+module_exit(hash_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
new file mode 100644
index 000000000..467c59a83
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com>
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize, initval support  */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
+IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mac");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipmac
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmac4_elem {
+	/* Zero valued IP addresses cannot be stored */
+	__be32 ip;
+	union {
+		unsigned char ether[ETH_ALEN];
+		__be32 foo[2];
+	};
+};
+
+/* Common functions */
+
+static bool
+hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
+		       const struct hash_ipmac4_elem *e2,
+		       u32 *multi)
+{
+	return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) ||
+	    nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
+		      const struct hash_ipmac4_elem *e)
+{
+	next->ip = e->ip;
+}
+
+#define MTYPE		hash_ipmac4
+#define PF		4
+#define HOST_MASK	32
+#define HKEY_DATALEN	sizeof(struct hash_ipmac4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		 const struct xt_action_param *par,
+		 enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (skb_mac_header(skb) < skb->head ||
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+		return -EINVAL;
+
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
+		 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !tb[IPSET_ATTR_ETHER] ||
+		     nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+		ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+	if (is_zero_ether_addr(e.ether))
+		return -IPSET_ERR_HASH_ELEM;
+
+	return adtfn(set, &e, &ext, &ext, flags);
+}
+
+/* IPv6 variant */
+
+/* Member elements */
+struct hash_ipmac6_elem {
+	/* Zero valued IP addresses cannot be stored */
+	union nf_inet_addr ip;
+	union {
+		unsigned char ether[ETH_ALEN];
+		__be32 foo[2];
+	};
+};
+
+/* Common functions */
+
+static bool
+hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
+		       const struct hash_ipmac6_elem *e2,
+		       u32 *multi)
+{
+	return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) &&
+		ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
+	    nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
+		      const struct hash_ipmac6_elem *e)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE		hash_ipmac6
+#define PF		6
+#define HOST_MASK	128
+#define HKEY_DATALEN	sizeof(struct hash_ipmac6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		 const struct xt_action_param *par,
+		 enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmac6_elem e = {
+		{ .all = { 0 } },
+		{ .foo[0] = 0, .foo[1] = 0 }
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (skb_mac_header(skb) < skb->head ||
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+		return -EINVAL;
+
+	if (opt->flags & IPSET_DIM_TWO_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
+		 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmac6_elem e = {
+		{ .all = { 0 } },
+		{ .foo[0] = 0, .foo[1] = 0 }
+	};
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !tb[IPSET_ATTR_ETHER] ||
+		     nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+		ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+	if (is_zero_ether_addr(e.ether))
+		return -IPSET_ERR_HASH_ELEM;
+
+	return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_ipmac_type __read_mostly = {
+	.name		= "hash:ip,mac",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ipmac_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_ETHER]	= { .type = NLA_BINARY,
+				.len  = ETH_ALEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipmac_init(void)
+{
+	return ip_set_type_register(&hash_ipmac_type);
+}
+
+static void __exit
+hash_ipmac_fini(void)
+{
+	ip_set_type_unregister(&hash_ipmac_type);
+}
+
+module_init(hash_ipmac_init);
+module_exit(hash_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
new file mode 100644
index 000000000..a22ec1a6f
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip,mark type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Forceadd support */
+/*				2	   skbinfo support */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize, initval support  */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
+IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mark");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipmark
+#define IP_SET_HASH_WITH_MARKMASK
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmark4_elem {
+	__be32 ip;
+	__u32 mark;
+};
+
+/* Common functions */
+
+static bool
+hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
+			const struct hash_ipmark4_elem *ip2,
+			u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark4_data_list(struct sk_buff *skb,
+		       const struct hash_ipmark4_elem *data)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
+		       const struct hash_ipmark4_elem *d)
+{
+	next->ip = d->ip;
+}
+
+#define MTYPE		hash_ipmark4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ipmark4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.mark = skb->mark;
+	e.mark &= h->markmask;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_ipmark4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0, i = 0;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
+	e.mark &= h->markmask;
+	if (e.mark == 0 && e.ip == 0)
+		return -IPSET_ERR_HASH_ELEM;
+
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip = ntohl(e.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to) {
+			if (e.mark == 0 && ip_to == 0)
+				return -IPSET_ERR_HASH_ELEM;
+			swap(ip, ip_to);
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	for (; ip <= ip_to; ip++, i++) {
+		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ipmark4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipmark6_elem {
+	union nf_inet_addr ip;
+	__u32 mark;
+};
+
+/* Common functions */
+
+static bool
+hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
+			const struct hash_ipmark6_elem *ip2,
+			u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark6_data_list(struct sk_buff *skb,
+		       const struct hash_ipmark6_elem *data)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
+		       const struct hash_ipmark6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_ipmark6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ipmark6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.mark = skb->mark;
+	e.mark &= h->markmask;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ipmark6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+	if (unlikely(tb[IPSET_ATTR_CIDR])) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr != HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
+	e.mark &= h->markmask;
+
+	if (adt == IPSET_TEST) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+	if (ret && !ip_set_eexist(ret, flags))
+		return ret;
+
+	return 0;
+}
+
+static struct ip_set_type hash_ipmark_type __read_mostly = {
+	.name		= "hash:ip,mark",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_MARK,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ipmark_create,
+	.create_policy	= {
+		[IPSET_ATTR_MARKMASK]	= { .type = NLA_U32 },
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_MARK]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipmark_init(void)
+{
+	return ip_set_type_register(&hash_ipmark_type);
+}
+
+static void __exit
+hash_ipmark_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_ipmark_type);
+}
+
+module_init(hash_ipmark_init);
+module_exit(hash_ipmark_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
new file mode 100644
index 000000000..10481760a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip,port type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Counters support added */
+/*				3    Comments support added */
+/*				4    Forceadd support added */
+/*				5    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,port");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipport
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipport4_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+/* Common functions */
+
+static bool
+hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
+			const struct hash_ipport4_elem *ip2,
+			u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static bool
+hash_ipport4_data_list(struct sk_buff *skb,
+		       const struct hash_ipport4_elem *data)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipport4_data_next(struct hash_ipport4_elem *next,
+		       const struct hash_ipport4_elem *d)
+{
+	next->ip = d->ip;
+	next->port = d->port;
+}
+
+#define MTYPE		hash_ipport4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport4_elem e = { .ip = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_ipport4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport4_elem e = { .ip = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
+	      tb[IPSET_ATTR_PORT_TO])) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip = ntohl(e.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	port_to = port = ntohs(e.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	for (; ip <= ip_to; ip++) {
+		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+						       : port;
+		for (; p <= port_to; p++, i++) {
+			e.ip = htonl(ip);
+			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
+			ret = adtfn(set, &e, &ext, &ext, flags);
+
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+/* Common functions */
+
+static bool
+hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
+			const struct hash_ipport6_elem *ip2,
+			u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static bool
+hash_ipport6_data_list(struct sk_buff *skb,
+		       const struct hash_ipport6_elem *data)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipport6_data_next(struct hash_ipport6_elem *next,
+		       const struct hash_ipport6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_ipport6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ipport6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+	if (unlikely(tb[IPSET_ATTR_CIDR])) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr != HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_ipport_type __read_mostly = {
+	.name		= "hash:ip,port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ipport_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipport_init(void)
+{
+	return ip_set_type_register(&hash_ipport_type);
+}
+
+static void __exit
+hash_ipport_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_ipport_type);
+}
+
+module_init(hash_ipport_init);
+module_exit(hash_ipport_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
new file mode 100644
index 000000000..39a01934b
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,ip type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Counters support added */
+/*				3    Comments support added */
+/*				4    Forceadd support added */
+/*				5    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,port,ip");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipportip
+
+/* IPv4 variant */
+
+/* Member elements  */
+struct hash_ipportip4_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+static bool
+hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
+			  const struct hash_ipportip4_elem *ip2,
+			  u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->ip2 == ip2->ip2 &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static bool
+hash_ipportip4_data_list(struct sk_buff *skb,
+			 const struct hash_ipportip4_elem *data)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
+			 const struct hash_ipportip4_elem *d)
+{
+	next->ip = d->ip;
+	next->port = d->port;
+}
+
+/* Common functions */
+#define MTYPE		hash_ipportip4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    const struct xt_action_param *par,
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip4_elem e = { .ip = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_ipportip4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip4_elem e = { .ip = 0 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2);
+	if (ret)
+		return ret;
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
+	      tb[IPSET_ATTR_PORT_TO])) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip = ntohl(e.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	port_to = port = ntohs(e.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	for (; ip <= ip_to; ip++) {
+		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+						       : port;
+		for (; p <= port_to; p++, i++) {
+			e.ip = htonl(ip);
+			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipportip4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
+			ret = adtfn(set, &e, &ext, &ext, flags);
+
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipportip6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+/* Common functions */
+
+static bool
+hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
+			  const struct hash_ipportip6_elem *ip2,
+			  u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static bool
+hash_ipportip6_data_list(struct sk_buff *skb,
+			 const struct hash_ipportip6_elem *data)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
+			 const struct hash_ipportip6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_ipportip6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    const struct xt_action_param *par,
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ipportip6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip6_elem e = {  .ip = { .all = { 0 } } };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+	if (unlikely(tb[IPSET_ATTR_CIDR])) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr != HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
+	if (ret)
+		return ret;
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_ipportip_type __read_mostly = {
+	.name		= "hash:ip,port,ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ipportip_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipportip_init(void)
+{
+	return ip_set_type_register(&hash_ipportip_type);
+}
+
+static void __exit
+hash_ipportip_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_ipportip_type);
+}
+
+module_init(hash_ipportip_init);
+module_exit(hash_ipportip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
new file mode 100644
index 000000000..5c6de605a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+/*				4    Counters support added */
+/*				5    Comments support added */
+/*				6    Forceadd support added */
+/*				7    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,port,net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipportnet
+
+/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
+ * However this way we have to store internally cidr - 1,
+ * dancing back and forth.
+ */
+#define IP_SET_HASH_WITH_NETS_PACKED
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipportnet4_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+};
+
+/* Common functions */
+
+static bool
+hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
+			   const struct hash_ipportnet4_elem *ip2,
+			   u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->ip2 == ip2->ip2 &&
+	       ip1->cidr == ip2->cidr &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static int
+hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
+{
+	elem->ip2 &= ip_set_netmask(cidr);
+	elem->cidr = cidr - 1;
+}
+
+static bool
+hash_ipportnet4_data_list(struct sk_buff *skb,
+			  const struct hash_ipportnet4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
+			  const struct hash_ipportnet4_elem *d)
+{
+	next->ip = d->ip;
+	next->port = d->port;
+	next->ip2 = d->ip2;
+}
+
+#define MTYPE		hash_ipportnet4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     const struct xt_action_param *par,
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ipportnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet4_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK - 1;
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+	e.ip2 &= ip_set_netmask(e.cidr + 1);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_ipportnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+	u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+	bool with_ports = false;
+	u8 cidr;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr = cidr - 1;
+	}
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports ||
+	      tb[IPSET_ATTR_IP2_TO])) {
+		e.ip = htonl(ip);
+		e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	port_to = port = ntohs(e.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	}
+
+	ip2_to = ip2_from;
+	if (tb[IPSET_ATTR_IP2_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+		if (ret)
+			return ret;
+		if (ip2_from > ip2_to)
+			swap(ip2_from, ip2_to);
+		if (ip2_from + UINT_MAX == ip2_to)
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+	}
+
+	if (retried) {
+		ip = ntohl(h->next.ip);
+		p = ntohs(h->next.port);
+		ip2 = ntohl(h->next.ip2);
+	} else {
+		p = port;
+		ip2 = ip2_from;
+	}
+	for (; ip <= ip_to; ip++) {
+		e.ip = htonl(ip);
+		for (; p <= port_to; p++) {
+			e.port = htons(p);
+			do {
+				i++;
+				e.ip2 = htonl(ip2);
+				ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
+				e.cidr = cidr - 1;
+				if (i > IPSET_MAX_RANGE) {
+					hash_ipportnet4_data_next(&h->next,
+								  &e);
+					return -ERANGE;
+				}
+				ret = adtfn(set, &e, &ext, &ext, flags);
+
+				if (ret && !ip_set_eexist(ret, flags))
+					return ret;
+
+				ret = 0;
+			} while (ip2++ < ip2_to);
+			ip2 = ip2_from;
+		}
+		p = port;
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipportnet6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 cidr:7;
+	u8 nomatch:1;
+	u8 proto;
+};
+
+/* Common functions */
+
+static bool
+hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
+			   const struct hash_ipportnet6_elem *ip2,
+			   u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) &&
+	       ip1->cidr == ip2->cidr &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static int
+hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip2, cidr);
+	elem->cidr = cidr - 1;
+}
+
+static bool
+hash_ipportnet6_data_list(struct sk_buff *skb,
+			  const struct hash_ipportnet6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
+			  const struct hash_ipportnet6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_ipportnet6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     const struct xt_action_param *par,
+		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_ipportnet6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet6_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK - 1;
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+	ip6_netmask(&e.ip2, e.cidr + 1);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ipportnet6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	u8 cidr;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+	if (unlikely(tb[IPSET_ATTR_CIDR])) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr != HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr = cidr - 1;
+	}
+
+	ip6_netmask(&e.ip2, e.cidr + 1);
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_ipportnet_type __read_mostly = {
+	.name		= "hash:ip,port,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+			  IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_ipportnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipportnet_init(void)
+{
+	return ip_set_type_register(&hash_ipportnet_type);
+}
+
+static void __exit
+hash_ipportnet_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_ipportnet_type);
+}
+
+module_init(hash_ipportnet_init);
+module_exit(hash_ipportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
new file mode 100644
index 000000000..718814730
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize, initval support */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE		hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+	/* Zero valued IP addresses cannot be stored */
+	union {
+		unsigned char ether[ETH_ALEN];
+		__be32 foo[2];
+	};
+};
+
+/* Common functions */
+
+static bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+		     const struct hash_mac4_elem *e2,
+		     u32 *multi)
+{
+	return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+	if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+		    const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE		hash_mac4
+#define HOST_MASK	32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (skb_mac_header(skb) < skb->head ||
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+		return -EINVAL;
+
+	if (opt->flags & IPSET_DIM_ONE_SRC)
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+	else
+		ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+	if (is_zero_ether_addr(e.ether))
+		return -EINVAL;
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+		     nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+	ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
+	if (is_zero_ether_addr(e.ether))
+		return -IPSET_ERR_HASH_ELEM;
+
+	return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+	.name		= "hash:mac",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_MAC,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_mac_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_ETHER]	= { .type = NLA_BINARY,
+					    .len  = ETH_ALEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+	return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
new file mode 100644
index 000000000..ce0a9ce5a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Range as input support for IPv4 added */
+/*				2    nomatch flag support added */
+/*				3    Counters support added */
+/*				4    Comments support added */
+/*				5    Forceadd support added */
+/*				6    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	7 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_net
+#define IP_SET_HASH_WITH_NETS
+
+/* IPv4 variant */
+
+/* Member elements  */
+struct hash_net4_elem {
+	__be32 ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+};
+
+/* Common functions */
+
+static bool
+hash_net4_data_equal(const struct hash_net4_elem *ip1,
+		     const struct hash_net4_elem *ip2,
+		     u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static int
+hash_net4_do_data_match(const struct hash_net4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
+{
+	elem->ip &= ip_set_netmask(cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_net4_data_next(struct hash_net4_elem *next,
+		    const struct hash_net4_elem *d)
+{
+	next->ip = d->ip;
+}
+
+#define MTYPE		hash_net4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_net4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net4_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (e.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_net4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net4_elem e = { .cidr = HOST_MASK };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, i = 0;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!e.cidr || e.cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip_to < ip)
+			swap(ip, ip_to);
+		if (ip + UINT_MAX == ip_to)
+			return -IPSET_ERR_HASH_RANGE;
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	do {
+		i++;
+		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_net4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
+		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	} while (ip++ < ip_to);
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_net6_elem {
+	union nf_inet_addr ip;
+	u16 padding0;
+	u8 nomatch;
+	u8 cidr;
+};
+
+/* Common functions */
+
+static bool
+hash_net6_data_equal(const struct hash_net6_elem *ip1,
+		     const struct hash_net6_elem *ip2,
+		     u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static int
+hash_net6_do_data_match(const struct hash_net6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip, cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_net6_data_next(struct hash_net6_elem *next,
+		    const struct hash_net6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_net6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_net6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net6_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (e.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net6_elem e = { .cidr = HOST_MASK };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!e.cidr || e.cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ip6_netmask(&e.ip, e.cidr);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_net_type __read_mostly = {
+	.name		= "hash:net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_net_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_net_init(void)
+{
+	return ip_set_type_register(&hash_net_type);
+}
+
+static void __exit
+hash_net_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_net_type);
+}
+
+module_init(hash_net_init);
+module_exit(hash_net_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
new file mode 100644
index 000000000..bf1a3851b
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:net,iface type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    nomatch flag support added */
+/*				2    /0 support added */
+/*				3    Counters support added */
+/*				4    Comments support added */
+/*				5    Forceadd support added */
+/*				6    skbinfo support added */
+/*				7    interface wildcard support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,iface");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netiface
+#define IP_SET_HASH_WITH_NETS
+#define IP_SET_HASH_WITH_MULTI
+#define IP_SET_HASH_WITH_NET0
+
+#define STRLCPY(a, b)	strlcpy(a, b, IFNAMSIZ)
+
+/* IPv4 variant */
+
+struct hash_netiface4_elem_hashed {
+	__be32 ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+};
+
+/* Member elements */
+struct hash_netiface4_elem {
+	__be32 ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	u8 wildcard;
+	char iface[IFNAMSIZ];
+};
+
+/* Common functions */
+
+static bool
+hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
+			  const struct hash_netiface4_elem *ip2,
+			  u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->cidr == ip2->cidr &&
+	       (++*multi) &&
+	       ip1->physdev == ip2->physdev &&
+	       (ip1->wildcard ?
+		strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+		strcmp(ip1->iface, ip2->iface) == 0);
+}
+
+static int
+hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
+{
+	elem->ip &= ip_set_netmask(cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_netiface4_data_list(struct sk_buff *skb,
+			 const struct hash_netiface4_elem *data)
+{
+	u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+		    (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
+
+	if (data->nomatch)
+		flags |= IPSET_FLAG_NOMATCH;
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
+	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netiface4_data_next(struct hash_netiface4_elem *next,
+			 const struct hash_netiface4_elem *d)
+{
+	next->ip = d->ip;
+}
+
+#define MTYPE		hash_netiface4
+#define HOST_MASK	32
+#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed)
+#include "ip_set_hash_gen.h"
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb, struct net *net)
+{
+	struct net_device *dev = nf_bridge_get_physindev(skb, net);
+
+	return dev ? dev->name : NULL;
+}
+
+static const char *get_physoutdev_name(const struct sk_buff *skb)
+{
+	struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+	return dev ? dev->name : NULL;
+}
+#endif
+
+static int
+hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    const struct xt_action_param *par,
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct hash_netiface4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netiface4_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+		.elem = 1,
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr);
+
+#define IFACE(dir)	(par->state->dir ? par->state->dir->name : "")
+#define SRCDIR		(opt->flags & IPSET_DIM_TWO_SRC)
+
+	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
+					      get_physoutdev_name(skb);
+
+		if (!eiface)
+			return -EINVAL;
+		STRLCPY(e.iface, eiface);
+		e.physdev = 1;
+#endif
+	} else {
+		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
+
+	if (strlen(e.iface) == 0)
+		return -EINVAL;
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_netiface4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, i = 0;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !tb[IPSET_ATTR_IFACE] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (e.cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+	nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_PHYSDEV)
+			e.physdev = 1;
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+		if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+			e.wildcard = 1;
+	}
+	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip_to < ip)
+			swap(ip, ip_to);
+		if (ip + UINT_MAX == ip_to)
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip, ip_to, e.cidr);
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	do {
+		i++;
+		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_netiface4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
+		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	} while (ip++ < ip_to);
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_netiface6_elem_hashed {
+	union nf_inet_addr ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+};
+
+struct hash_netiface6_elem {
+	union nf_inet_addr ip;
+	u8 physdev;
+	u8 cidr;
+	u8 nomatch;
+	u8 elem;
+	u8 wildcard;
+	char iface[IFNAMSIZ];
+};
+
+/* Common functions */
+
+static bool
+hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
+			  const struct hash_netiface6_elem *ip2,
+			  u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->cidr == ip2->cidr &&
+	       (++*multi) &&
+	       ip1->physdev == ip2->physdev &&
+	       (ip1->wildcard ?
+		strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+		strcmp(ip1->iface, ip2->iface) == 0);
+}
+
+static int
+hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip, cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_netiface6_data_list(struct sk_buff *skb,
+			 const struct hash_netiface6_elem *data)
+{
+	u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+		    (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
+
+	if (data->nomatch)
+		flags |= IPSET_FLAG_NOMATCH;
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
+	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netiface6_data_next(struct hash_netiface6_elem *next,
+			 const struct hash_netiface6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_netiface6
+#define HOST_MASK	128
+#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    const struct xt_action_param *par,
+		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct hash_netiface6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netiface6_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+		.elem = 1,
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr);
+
+	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
+					      get_physoutdev_name(skb);
+
+		if (!eiface)
+			return -EINVAL;
+		STRLCPY(e.iface, eiface);
+		e.physdev = 1;
+#endif
+	} else {
+		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
+
+	if (strlen(e.iface) == 0)
+		return -EINVAL;
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !tb[IPSET_ATTR_IFACE] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (e.cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ip6_netmask(&e.ip, e.cidr);
+
+	nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_PHYSDEV)
+			e.physdev = 1;
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+		if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+			e.wildcard = 1;
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_netiface_type __read_mostly = {
+	.name		= "hash:net,iface",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_IFACE |
+			  IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_netiface_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IFACE]	= { .type = NLA_NUL_STRING,
+					    .len  = IFNAMSIZ - 1 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netiface_init(void)
+{
+	return ip_set_type_register(&hash_netiface_type);
+}
+
+static void __exit
+hash_netiface_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_netiface_type);
+}
+
+module_init(hash_netiface_init);
+module_exit(hash_netiface_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
new file mode 100644
index 000000000..c07b70bf3
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
+ * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
+ */
+
+/* Kernel module implementing an IP set type: the hash:net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1	   Forceadd support added */
+/*				2	   skbinfo support added */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netnet
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+
+/* IPv4 variants */
+
+/* Member elements  */
+struct hash_netnet4_elem {
+	union {
+		__be32 ip[2];
+		__be64 ipcmp;
+	};
+	u8 nomatch;
+	u8 padding;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+};
+
+/* Common functions */
+
+static bool
+hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
+			const struct hash_netnet4_elem *ip2,
+			u32 *multi)
+{
+	return ip1->ipcmp == ip2->ipcmp &&
+	       ip1->ccmp == ip2->ccmp;
+}
+
+static int
+hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
+			     struct hash_netnet4_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static void
+hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
+{
+	if (inner) {
+		elem->ip[1] &= ip_set_netmask(cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		elem->ip[0] &= ip_set_netmask(cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netnet4_data_list(struct sk_buff *skb,
+		       const struct hash_netnet4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netnet4_data_next(struct hash_netnet4_elem *next,
+		       const struct hash_netnet4_elem *d)
+{
+	next->ipcmp = d->ipcmp;
+}
+
+#define MTYPE		hash_netnet4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static void
+hash_netnet4_init(struct hash_netnet4_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
+static int
+hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]);
+	e.ip[0] &= ip_set_netmask(e.cidr[0]);
+	e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_netnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0;
+	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	hash_netnet4_init(&e);
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] ||
+				   tb[IPSET_ATTR_IP2_TO])) {
+		e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+		e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip_to < ip)
+			swap(ip, ip_to);
+		if (unlikely(ip + UINT_MAX == ip_to))
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
+
+	ip2_to = ip2_from;
+	if (tb[IPSET_ATTR_IP2_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+		if (ret)
+			return ret;
+		if (ip2_to < ip2_from)
+			swap(ip2_from, ip2_to);
+		if (unlikely(ip2_from + UINT_MAX == ip2_to))
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
+
+	if (retried) {
+		ip = ntohl(h->next.ip[0]);
+		ip2 = ntohl(h->next.ip[1]);
+	} else {
+		ip2 = ip2_from;
+	}
+
+	do {
+		e.ip[0] = htonl(ip);
+		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+		do {
+			i++;
+			e.ip[1] = htonl(ip2);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netnet4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
+			ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+			ret = adtfn(set, &e, &ext, &ext, flags);
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+
+			ret = 0;
+		} while (ip2++ < ip2_to);
+		ip2 = ip2_from;
+	} while (ip++ < ip_to);
+	return ret;
+}
+
+/* IPv6 variants */
+
+struct hash_netnet6_elem {
+	union nf_inet_addr ip[2];
+	u8 nomatch;
+	u8 padding;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+};
+
+/* Common functions */
+
+static bool
+hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
+			const struct hash_netnet6_elem *ip2,
+			u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+	       ip1->ccmp == ip2->ccmp;
+}
+
+static int
+hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
+{
+	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static void
+hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
+			     struct hash_netnet6_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static void
+hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
+{
+	if (inner) {
+		ip6_netmask(&elem->ip[1], cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		ip6_netmask(&elem->ip[0], cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netnet6_data_list(struct sk_buff *skb,
+		       const struct hash_netnet6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netnet6_data_next(struct hash_netnet6_elem *next,
+		       const struct hash_netnet6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_netnet6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static void
+hash_netnet6_init(struct hash_netnet6_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
+static int
+hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netnet6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	hash_netnet6_init(&e);
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+	return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+	       ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_netnet_type __read_mostly = {
+	.name		= "hash:net,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_netnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netnet_init(void)
+{
+	return ip_set_type_register(&hash_netnet_type);
+}
+
+static void __exit
+hash_netnet_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_netnet_type);
+}
+
+module_init(hash_netnet_init);
+module_exit(hash_netnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
new file mode 100644
index 000000000..d1a0628df
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:net,port type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+/*				4    Counters support added */
+/*				5    Comments support added */
+/*				6    Forceadd support added */
+/*				7    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,port");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netport
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
+ * However this way we have to store internally cidr - 1,
+ * dancing back and forth.
+ */
+#define IP_SET_HASH_WITH_NETS_PACKED
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_netport4_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+};
+
+/* Common functions */
+
+static bool
+hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
+			 const struct hash_netport4_elem *ip2,
+			 u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static int
+hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
+{
+	elem->ip &= ip_set_netmask(cidr);
+	elem->cidr = cidr - 1;
+}
+
+static bool
+hash_netport4_data_list(struct sk_buff *skb,
+			const struct hash_netport4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netport4_data_next(struct hash_netport4_elem *next,
+			const struct hash_netport4_elem *d)
+{
+	next->ip = d->ip;
+	next->port = d->port;
+}
+
+#define MTYPE		hash_netport4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		   const struct xt_action_param *par,
+		   enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netport4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport4_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK - 1;
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	e.ip &= ip_set_netmask(e.cidr + 1);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_netport4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
+	bool with_ports = false;
+	u8 cidr;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr = cidr - 1;
+	}
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
+		e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = port_to = ntohs(e.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port_to < port)
+			swap(port, port_to);
+	}
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip_to < ip)
+			swap(ip, ip_to);
+		if (ip + UINT_MAX == ip_to)
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+	}
+
+	if (retried) {
+		ip = ntohl(h->next.ip);
+		p = ntohs(h->next.port);
+	} else {
+		p = port;
+	}
+	do {
+		e.ip = htonl(ip);
+		ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
+		e.cidr = cidr - 1;
+		for (; p <= port_to; p++, i++) {
+			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
+			ret = adtfn(set, &e, &ext, &ext, flags);
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+
+			ret = 0;
+		}
+		p = port;
+	} while (ip++ < ip_to);
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_netport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr:7;
+	u8 nomatch:1;
+};
+
+/* Common functions */
+
+static bool
+hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
+			 const struct hash_netport6_elem *ip2,
+			 u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static int
+hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip, cidr);
+	elem->cidr = cidr - 1;
+}
+
+static bool
+hash_netport6_data_list(struct sk_buff *skb,
+			const struct hash_netport6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netport6_data_next(struct hash_netport6_elem *next,
+			const struct hash_netport6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_netport6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		   const struct xt_action_param *par,
+		   enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netport6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport6_elem e = {
+		.cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+	};
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	if (adt == IPSET_TEST)
+		e.cidr = HOST_MASK - 1;
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	ip6_netmask(&e.ip, e.cidr + 1);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
+		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_netport6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	u8 cidr;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (!cidr || cidr > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+		e.cidr = cidr - 1;
+	}
+	ip6_netmask(&e.ip, e.cidr + 1);
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_netport_type __read_mostly = {
+	.name		= "hash:net,port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_netport_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netport_init(void)
+{
+	return ip_set_type_register(&hash_netport_type);
+}
+
+static void __exit
+hash_netport_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_netport_type);
+}
+
+module_init(hash_netport_init);
+module_exit(hash_netport_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
new file mode 100644
index 000000000..bf4f91b78
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				0    Comments support added */
+/*				1    Forceadd support added */
+/*				2    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	3 /* bucketsize, initval support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,port,net");
+
+/* Type specific function prefix */
+#define HTYPE		hash_netportnet
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_netportnet4_elem {
+	union {
+		__be32 ip[2];
+		__be64 ipcmp;
+	};
+	__be16 port;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+	u16 padding;
+	u8 nomatch;
+	u8 proto;
+};
+
+/* Common functions */
+
+static bool
+hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
+			    const struct hash_netportnet4_elem *ip2,
+			    u32 *multi)
+{
+	return ip1->ipcmp == ip2->ipcmp &&
+	       ip1->ccmp == ip2->ccmp &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static int
+hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
+				 struct hash_netportnet4_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static void
+hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
+			      u8 cidr, bool inner)
+{
+	if (inner) {
+		elem->ip[1] &= ip_set_netmask(cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		elem->ip[0] &= ip_set_netmask(cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netportnet4_data_list(struct sk_buff *skb,
+			   const struct hash_netportnet4_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
+			   const struct hash_netportnet4_elem *d)
+{
+	next->ipcmp = d->ipcmp;
+	next->port = d->port;
+}
+
+#define MTYPE		hash_netportnet4
+#define HOST_MASK	32
+#include "ip_set_hash_gen.h"
+
+static void
+hash_netportnet4_init(struct hash_netportnet4_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
+static int
+hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netportnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]);
+	e.ip[0] &= ip_set_netmask(e.cidr[0]);
+	e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+	if (from == 0 && to == UINT_MAX) {
+		*cidr = 0;
+		return to;
+	}
+	return ip_set_range_to_cidr(from, to, cidr);
+}
+
+static int
+hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct hash_netportnet4 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+	u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	hash_netportnet4_init(&e);
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (e.cidr[0] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (e.cidr[1] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMP))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) {
+		e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+		e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip;
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+		if (unlikely(ip + UINT_MAX == ip_to))
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
+
+	port_to = port = ntohs(e.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	}
+
+	ip2_to = ip2_from;
+	if (tb[IPSET_ATTR_IP2_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+		if (ret)
+			return ret;
+		if (ip2_from > ip2_to)
+			swap(ip2_from, ip2_to);
+		if (unlikely(ip2_from + UINT_MAX == ip2_to))
+			return -IPSET_ERR_HASH_RANGE;
+	} else {
+		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
+
+	if (retried) {
+		ip = ntohl(h->next.ip[0]);
+		p = ntohs(h->next.port);
+		ip2 = ntohl(h->next.ip[1]);
+	} else {
+		p = port;
+		ip2 = ip2_from;
+	}
+
+	do {
+		e.ip[0] = htonl(ip);
+		ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
+		for (; p <= port_to; p++) {
+			e.port = htons(p);
+			do {
+				i++;
+				e.ip[1] = htonl(ip2);
+				if (i > IPSET_MAX_RANGE) {
+					hash_netportnet4_data_next(&h->next,
+								   &e);
+					return -ERANGE;
+				}
+				ip2 = hash_netportnet4_range_to_cidr(ip2,
+							ip2_to, &e.cidr[1]);
+				ret = adtfn(set, &e, &ext, &ext, flags);
+				if (ret && !ip_set_eexist(ret, flags))
+					return ret;
+
+				ret = 0;
+			} while (ip2++ < ip2_to);
+			ip2 = ip2_from;
+		}
+		p = port;
+	} while (ip++ < ip_to);
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_netportnet6_elem {
+	union nf_inet_addr ip[2];
+	__be16 port;
+	union {
+		u8 cidr[2];
+		u16 ccmp;
+	};
+	u16 padding;
+	u8 nomatch;
+	u8 proto;
+};
+
+/* Common functions */
+
+static bool
+hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
+			    const struct hash_netportnet6_elem *ip2,
+			    u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+	       ip1->ccmp == ip2->ccmp &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static int
+hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
+{
+	return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static void
+hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
+{
+	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static void
+hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
+{
+	swap(*flags, elem->nomatch);
+}
+
+static void
+hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
+				 struct hash_netportnet6_elem *orig)
+{
+	elem->ip[1] = orig->ip[1];
+}
+
+static void
+hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
+			      u8 cidr, bool inner)
+{
+	if (inner) {
+		ip6_netmask(&elem->ip[1], cidr);
+		elem->cidr[1] = cidr;
+	} else {
+		ip6_netmask(&elem->ip[0], cidr);
+		elem->cidr[0] = cidr;
+	}
+}
+
+static bool
+hash_netportnet6_data_list(struct sk_buff *skb,
+			   const struct hash_netportnet6_elem *data)
+{
+	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+	    (flags &&
+	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+		goto nla_put_failure;
+	return false;
+
+nla_put_failure:
+	return true;
+}
+
+static void
+hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
+			   const struct hash_netportnet6_elem *d)
+{
+	next->port = d->port;
+}
+
+#undef MTYPE
+#undef HOST_MASK
+
+#define MTYPE		hash_netportnet6
+#define HOST_MASK	128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static void
+hash_netportnet6_init(struct hash_netportnet6_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
+static int
+hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	const struct hash_netportnet6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+	if (adt == IPSET_TEST)
+		e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
+
+	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+				 &e.port, &e.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6);
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_netportnet6 *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netportnet6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 port, port_to;
+	bool with_ports = false;
+	int ret;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	hash_netportnet6_init(&e);
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR]) {
+		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		if (e.cidr[0] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	if (tb[IPSET_ATTR_CIDR2]) {
+		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+		if (e.cidr[1] > HOST_MASK)
+			return -IPSET_ERR_INVALID_CIDR;
+	}
+
+	ip6_netmask(&e.ip[0], e.cidr[0]);
+	ip6_netmask(&e.ip[1], e.cidr[1]);
+
+	e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(e.proto);
+
+		if (e.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else {
+		return -IPSET_ERR_MISSING_PROTO;
+	}
+
+	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+		e.port = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		if (cadt_flags & IPSET_FLAG_NOMATCH)
+			flags |= (IPSET_FLAG_NOMATCH << 16);
+	}
+
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+		       ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(e.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	if (retried)
+		port = ntohs(h->next.port);
+	for (; port <= port_to; port++) {
+		e.port = htons(port);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+static struct ip_set_type hash_netportnet_type __read_mostly = {
+	.name		= "hash:net,port,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+			  IPSET_TYPE_NOMATCH,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
+	.create		= hash_netportnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netportnet_init(void)
+{
+	return ip_set_type_register(&hash_netportnet_type);
+}
+
+static void __exit
+hash_netportnet_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&hash_netportnet_type);
+}
+
+module_init(hash_netportnet_init);
+module_exit(hash_netportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
new file mode 100644
index 000000000..5a67f7966
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
+
+/* Kernel module implementing an IP set type: the list:set type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_list.h>
+
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Counters support added */
+/*				2    Comments support added */
+#define IPSET_TYPE_REV_MAX	3 /* skbinfo support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_list:set");
+
+/* Member elements  */
+struct set_elem {
+	struct rcu_head rcu;
+	struct list_head list;
+	struct ip_set *set;	/* Sigh, in order to cleanup reference */
+	ip_set_id_t id;
+} __aligned(__alignof__(u64));
+
+struct set_adt_elem {
+	ip_set_id_t id;
+	ip_set_id_t refid;
+	int before;
+};
+
+/* Type structure */
+struct list_set {
+	u32 size;		/* size of set list array */
+	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
+	struct net *net;	/* namespace */
+	struct list_head members; /* the set members */
+};
+
+static int
+list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
+	       const struct xt_action_param *par,
+	       struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
+{
+	struct list_set *map = set->data;
+	struct ip_set_ext *mext = &opt->ext;
+	struct set_elem *e;
+	u32 flags = opt->cmdflags;
+	int ret;
+
+	/* Don't lookup sub-counters at all */
+	opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
+	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
+		opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE;
+	list_for_each_entry_rcu(e, &map->members, list) {
+		ret = ip_set_test(e->id, skb, par, opt);
+		if (ret <= 0)
+			continue;
+		if (ip_set_match_extensions(set, ext, mext, flags, e))
+			return 1;
+	}
+	return 0;
+}
+
+static int
+list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e;
+	int ret;
+
+	list_for_each_entry(e, &map->members, list) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, set)))
+			continue;
+		ret = ip_set_add(e->id, skb, par, opt);
+		if (ret == 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int
+list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e;
+	int ret;
+
+	list_for_each_entry(e, &map->members, list) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, set)))
+			continue;
+		ret = ip_set_del(e->id, skb, par, opt);
+		if (ret == 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int
+list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      const struct xt_action_param *par,
+	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+	switch (adt) {
+	case IPSET_TEST:
+		ret = list_set_ktest(set, skb, par, opt, &ext);
+		break;
+	case IPSET_ADD:
+		ret = list_set_kadd(set, skb, par, opt, &ext);
+		break;
+	case IPSET_DEL:
+		ret = list_set_kdel(set, skb, par, opt, &ext);
+		break;
+	default:
+		break;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* Userspace interfaces: we are protected by the nfnl mutex */
+
+static void
+__list_set_del_rcu(struct rcu_head * rcu)
+{
+	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
+	struct ip_set *set = e->set;
+
+	ip_set_ext_destroy(set, e);
+	kfree(e);
+}
+
+static void
+list_set_del(struct ip_set *set, struct set_elem *e)
+{
+	struct list_set *map = set->data;
+
+	set->elements--;
+	list_del_rcu(&e->list);
+	ip_set_put_byindex(map->net, e->id);
+	call_rcu(&e->rcu, __list_set_del_rcu);
+}
+
+static void
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
+{
+	struct list_set *map = set->data;
+
+	list_replace_rcu(&old->list, &e->list);
+	ip_set_put_byindex(map->net, old->id);
+	call_rcu(&old->rcu, __list_set_del_rcu);
+}
+
+static void
+set_cleanup_entries(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e, *n;
+
+	list_for_each_entry_safe(e, n, &map->members, list)
+		if (ip_set_timeout_expired(ext_timeout(e, set)))
+			list_set_del(set, e);
+}
+
+static int
+list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	       struct ip_set_ext *mext, u32 flags)
+{
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e, *next, *prev = NULL;
+	int ret;
+
+	list_for_each_entry(e, &map->members, list) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, set)))
+			continue;
+		else if (e->id != d->id) {
+			prev = e;
+			continue;
+		}
+
+		if (d->before == 0) {
+			ret = 1;
+		} else if (d->before > 0) {
+			next = list_next_entry(e, list);
+			ret = !list_is_last(&e->list, &map->members) &&
+			      next->id == d->refid;
+		} else {
+			ret = prev && prev->id == d->refid;
+		}
+		return ret;
+	}
+	return 0;
+}
+
+static void
+list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+			 struct set_elem *e)
+{
+	if (SET_WITH_COUNTER(set))
+		ip_set_init_counter(ext_counter(e, set), ext);
+	if (SET_WITH_COMMENT(set))
+		ip_set_init_comment(set, ext_comment(e, set), ext);
+	if (SET_WITH_SKBINFO(set))
+		ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
+	/* Update timeout last */
+	if (SET_WITH_TIMEOUT(set))
+		ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
+}
+
+static int
+list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	      struct ip_set_ext *mext, u32 flags)
+{
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e, *n, *prev, *next;
+	bool flag_exist = flags & IPSET_FLAG_EXIST;
+
+	/* Find where to add the new entry */
+	n = prev = next = NULL;
+	list_for_each_entry(e, &map->members, list) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, set)))
+			continue;
+		else if (d->id == e->id)
+			n = e;
+		else if (d->before == 0 || e->id != d->refid)
+			continue;
+		else if (d->before > 0)
+			next = e;
+		else
+			prev = e;
+	}
+
+	/* If before/after is used on an empty set */
+	if ((d->before > 0 && !next) ||
+	    (d->before < 0 && !prev))
+		return -IPSET_ERR_REF_EXIST;
+
+	/* Re-add already existing element */
+	if (n) {
+		if (!flag_exist)
+			return -IPSET_ERR_EXIST;
+		/* Update extensions */
+		ip_set_ext_destroy(set, n);
+		list_set_init_extensions(set, ext, n);
+
+		/* Set is already added to the list */
+		ip_set_put_byindex(map->net, d->id);
+		return 0;
+	}
+	/* Add new entry */
+	if (d->before == 0) {
+		/* Append  */
+		n = list_empty(&map->members) ? NULL :
+		    list_last_entry(&map->members, struct set_elem, list);
+	} else if (d->before > 0) {
+		/* Insert after next element */
+		if (!list_is_last(&next->list, &map->members))
+			n = list_next_entry(next, list);
+	} else {
+		/* Insert before prev element */
+		if (prev->list.prev != &map->members)
+			n = list_prev_entry(prev, list);
+	}
+	/* Can we replace a timed out entry? */
+	if (n &&
+	    !(SET_WITH_TIMEOUT(set) &&
+	      ip_set_timeout_expired(ext_timeout(n, set))))
+		n = NULL;
+
+	e = kzalloc(set->dsize, GFP_ATOMIC);
+	if (!e)
+		return -ENOMEM;
+	e->id = d->id;
+	e->set = set;
+	INIT_LIST_HEAD(&e->list);
+	list_set_init_extensions(set, ext, e);
+	if (n)
+		list_set_replace(set, e, n);
+	else if (next)
+		list_add_tail_rcu(&e->list, &next->list);
+	else if (prev)
+		list_add_rcu(&e->list, &prev->list);
+	else
+		list_add_tail_rcu(&e->list, &map->members);
+	set->elements++;
+
+	return 0;
+}
+
+static int
+list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+	      struct ip_set_ext *mext, u32 flags)
+{
+	struct list_set *map = set->data;
+	struct set_adt_elem *d = value;
+	struct set_elem *e, *next, *prev = NULL;
+
+	list_for_each_entry(e, &map->members, list) {
+		if (SET_WITH_TIMEOUT(set) &&
+		    ip_set_timeout_expired(ext_timeout(e, set)))
+			continue;
+		else if (e->id != d->id) {
+			prev = e;
+			continue;
+		}
+
+		if (d->before > 0) {
+			next = list_next_entry(e, list);
+			if (list_is_last(&e->list, &map->members) ||
+			    next->id != d->refid)
+				return -IPSET_ERR_REF_EXIST;
+		} else if (d->before < 0) {
+			if (!prev || prev->id != d->refid)
+				return -IPSET_ERR_REF_EXIST;
+		}
+		list_set_del(set, e);
+		return 0;
+	}
+	return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST;
+}
+
+static int
+list_set_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	struct list_set *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	struct ip_set *s;
+	int ret = 0;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	if (unlikely(!tb[IPSET_ATTR_NAME] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+	e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
+	if (e.id == IPSET_INVALID_ID)
+		return -IPSET_ERR_NAME;
+	/* "Loop detection" */
+	if (s->type->features & IPSET_TYPE_NAME) {
+		ret = -IPSET_ERR_LOOP;
+		goto finish;
+	}
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
+		e.before = f & IPSET_FLAG_BEFORE;
+	}
+
+	if (e.before && !tb[IPSET_ATTR_NAMEREF]) {
+		ret = -IPSET_ERR_BEFORE;
+		goto finish;
+	}
+
+	if (tb[IPSET_ATTR_NAMEREF]) {
+		e.refid = ip_set_get_byname(map->net,
+					    nla_data(tb[IPSET_ATTR_NAMEREF]),
+					    &s);
+		if (e.refid == IPSET_INVALID_ID) {
+			ret = -IPSET_ERR_NAMEREF;
+			goto finish;
+		}
+		if (!e.before)
+			e.before = -1;
+	}
+	if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set))
+		set_cleanup_entries(set);
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+
+finish:
+	if (e.refid != IPSET_INVALID_ID)
+		ip_set_put_byindex(map->net, e.refid);
+	if (adt != IPSET_ADD || ret)
+		ip_set_put_byindex(map->net, e.id);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static void
+list_set_flush(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e, *n;
+
+	list_for_each_entry_safe(e, n, &map->members, list)
+		list_set_del(set, e);
+	set->elements = 0;
+	set->ext_size = 0;
+}
+
+static void
+list_set_destroy(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+	struct set_elem *e, *n;
+
+	if (SET_WITH_TIMEOUT(set))
+		del_timer_sync(&map->gc);
+
+	list_for_each_entry_safe(e, n, &map->members, list) {
+		list_del(&e->list);
+		ip_set_put_byindex(map->net, e->id);
+		ip_set_ext_destroy(set, e);
+		kfree(e);
+	}
+	kfree(map);
+
+	set->data = NULL;
+}
+
+/* Calculate the actual memory size of the set data */
+static size_t
+list_set_memsize(const struct list_set *map, size_t dsize)
+{
+	struct set_elem *e;
+	u32 n = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(e, &map->members, list)
+		n++;
+	rcu_read_unlock();
+
+	return (sizeof(*map) + n * dsize);
+}
+
+static int
+list_set_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct list_set *map = set->data;
+	struct nlattr *nested;
+	size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
+
+	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+		goto nla_put_failure;
+	if (unlikely(ip_set_put_flags(skb, set)))
+		goto nla_put_failure;
+	nla_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int
+list_set_list(const struct ip_set *set,
+	      struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct list_set *map = set->data;
+	struct nlattr *atd, *nested;
+	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+	char name[IPSET_MAXNAMELEN];
+	struct set_elem *e;
+	int ret = 0;
+
+	atd = nla_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(e, &map->members, list) {
+		if (i < first ||
+		    (SET_WITH_TIMEOUT(set) &&
+		     ip_set_timeout_expired(ext_timeout(e, set)))) {
+			i++;
+			continue;
+		}
+		nested = nla_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested)
+			goto nla_put_failure;
+		ip_set_name_byindex(map->net, e->id, name);
+		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
+			goto nla_put_failure;
+		if (ip_set_put_extensions(skb, set, e, true))
+			goto nla_put_failure;
+		nla_nest_end(skb, nested);
+		i++;
+	}
+
+	nla_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[IPSET_CB_ARG0] = 0;
+	goto out;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	if (unlikely(i == first)) {
+		nla_nest_cancel(skb, atd);
+		cb->args[IPSET_CB_ARG0] = 0;
+		ret = -EMSGSIZE;
+	} else {
+		cb->args[IPSET_CB_ARG0] = i;
+		nla_nest_end(skb, atd);
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static bool
+list_set_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct list_set *x = a->data;
+	const struct list_set *y = b->data;
+
+	return x->size == y->size &&
+	       a->timeout == b->timeout &&
+	       a->extensions == b->extensions;
+}
+
+static const struct ip_set_type_variant set_variant = {
+	.kadt	= list_set_kadt,
+	.uadt	= list_set_uadt,
+	.adt	= {
+		[IPSET_ADD] = list_set_uadd,
+		[IPSET_DEL] = list_set_udel,
+		[IPSET_TEST] = list_set_utest,
+	},
+	.destroy = list_set_destroy,
+	.flush	= list_set_flush,
+	.head	= list_set_head,
+	.list	= list_set_list,
+	.same_set = list_set_same_set,
+};
+
+static void
+list_set_gc(struct timer_list *t)
+{
+	struct list_set *map = from_timer(map, t, gc);
+	struct ip_set *set = map->set;
+
+	spin_lock_bh(&set->lock);
+	set_cleanup_entries(set);
+	spin_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
+{
+	struct list_set *map = set->data;
+
+	timer_setup(&map->gc, gc, 0);
+	mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
+}
+
+/* Create list:set type of sets */
+
+static bool
+init_list_set(struct net *net, struct ip_set *set, u32 size)
+{
+	struct list_set *map;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return false;
+
+	map->size = size;
+	map->net = net;
+	map->set = set;
+	INIT_LIST_HEAD(&map->members);
+	set->data = map;
+
+	return true;
+}
+
+static int
+list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		u32 flags)
+{
+	u32 size = IP_SET_LIST_DEFAULT_SIZE;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_SIZE])
+		size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]);
+	if (size < IP_SET_LIST_MIN_SIZE)
+		size = IP_SET_LIST_MIN_SIZE;
+
+	set->variant = &set_variant;
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+				     __alignof__(struct set_elem));
+	if (!init_list_set(net, set, size))
+		return -ENOMEM;
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		list_set_gc_init(set, list_set_gc);
+	}
+	return 0;
+}
+
+static struct ip_set_type list_set_type __read_mostly = {
+	.name		= "list:set",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= list_set_create,
+	.create_policy	= {
+		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_NAME]	= { .type = NLA_STRING,
+					    .len = IPSET_MAXNAMELEN },
+		[IPSET_ATTR_NAMEREF]	= { .type = NLA_STRING,
+					    .len = IPSET_MAXNAMELEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING,
+					    .len  = IPSET_MAX_COMMENT_SIZE },
+		[IPSET_ATTR_SKBMARK]	= { .type = NLA_U64 },
+		[IPSET_ATTR_SKBPRIO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_SKBQUEUE]	= { .type = NLA_U16 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+list_set_init(void)
+{
+	return ip_set_type_register(&list_set_type);
+}
+
+static void __exit
+list_set_fini(void)
+{
+	rcu_barrier();
+	ip_set_type_unregister(&list_set_type);
+}
+
+module_init(list_set_init);
+module_exit(list_set_fini);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
new file mode 100644
index 000000000..ff570bff9
--- /dev/null
+++ b/net/netfilter/ipset/pfxlen.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/export.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+
+/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
+
+#ifdef E
+#undef E
+#endif
+
+#define PREFIXES_MAP						\
+	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+
+#define E(a, b, c, d) \
+	{.ip6 = { \
+		htonl(a), htonl(b), \
+		htonl(c), htonl(d), \
+	} }
+
+/* This table works for both IPv4 and IPv6;
+ * just use prefixlen_netmask_map[prefixlength].ip.
+ */
+const union nf_inet_addr ip_set_netmask_map[] = {
+	PREFIXES_MAP
+};
+EXPORT_SYMBOL_GPL(ip_set_netmask_map);
+
+#undef  E
+#define E(a, b, c, d)					\
+	{.ip6 = { (__force __be32)a, (__force __be32)b,	\
+		  (__force __be32)c, (__force __be32)d,	\
+	} }
+
+/* This table works for both IPv4 and IPv6;
+ * just use prefixlen_hostmask_map[prefixlength].ip.
+ */
+const union nf_inet_addr ip_set_hostmask_map[] = {
+	PREFIXES_MAP
+};
+EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
+
+/* Find the largest network which matches the range from left, in host order. */
+u32
+ip_set_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+	u32 last;
+	u8 i;
+
+	for (i = 1; i < 32; i++) {
+		if ((from & ip_set_hostmask(i)) != from)
+			continue;
+		last = from | ~ip_set_hostmask(i);
+		if (!after(last, to)) {
+			*cidr = i;
+			return last;
+		}
+	}
+	*cidr = 32;
+	return from;
+}
+EXPORT_SYMBOL_GPL(ip_set_range_to_cidr);
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
new file mode 100644
index 000000000..2a3017b9c
--- /dev/null
+++ b/net/netfilter/ipvs/Kconfig
@@ -0,0 +1,352 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# IP Virtual Server configuration
+#
+menuconfig IP_VS
+	tristate "IP virtual server support"
+	depends on INET && NETFILTER
+	depends on (NF_CONNTRACK || NF_CONNTRACK=n)
+	help
+	  IP Virtual Server support will let you build a high-performance
+	  virtual server based on cluster of two or more real servers. This
+	  option must be enabled for at least one of the clustered computers
+	  that will take care of intercepting incoming connections to a
+	  single IP address and scheduling them to real servers.
+
+	  Three request dispatching techniques are implemented, they are
+	  virtual server via NAT, virtual server via tunneling and virtual
+	  server via direct routing. The several scheduling algorithms can
+	  be used to choose which server the connection is directed to,
+	  thus load balancing can be achieved among the servers.  For more
+	  information and its administration program, please visit the
+	  following URL: <http://www.linuxvirtualserver.org/>.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+if IP_VS
+
+config	IP_VS_IPV6
+	bool "IPv6 support for IPVS"
+	depends on IPV6 = y || IP_VS = IPV6
+	select NF_DEFRAG_IPV6
+	help
+	  Add IPv6 support to IPVS.
+
+	  Say Y if unsure.
+
+config	IP_VS_DEBUG
+	bool "IP virtual server debugging"
+	help
+	  Say Y here if you want to get additional messages useful in
+	  debugging the IP virtual server code. You can change the debug
+	  level in /proc/sys/net/ipv4/vs/debug_level
+
+config	IP_VS_TAB_BITS
+	int "IPVS connection table size (the Nth power of 2)"
+	range 8 20 if !64BIT
+	range 8 27 if 64BIT
+	default 12
+	help
+	  The IPVS connection hash table uses the chaining scheme to handle
+	  hash collisions. Using a big IPVS connection hash table will greatly
+	  reduce conflicts when there are hundreds of thousands of connections
+	  in the hash table.
+
+	  Note the table size must be power of 2. The table size will be the
+	  value of 2 to the your input number power. The number to choose is
+	  from 8 to 27 for 64BIT(20 otherwise), the default number is 12,
+	  which means the table size is 4096. Don't input the number too
+	  small, otherwise you will lose performance on it. You can adapt the
+	  table size yourself, according to your virtual server application.
+	  It is good to set the table size not far less than the number of
+	  connections per second multiplying average lasting time of
+	  connection in the table.  For example, your virtual server gets 200
+	  connections per second, the connection lasts for 200 seconds in
+	  average in the connection table, the table size should be not far
+	  less than 200x200, it is good to set the table size 32768 (2**15).
+
+	  Another note that each connection occupies 128 bytes effectively and
+	  each hash entry uses 8 bytes, so you can estimate how much memory is
+	  needed for your box.
+
+	  You can overwrite this number setting conn_tab_bits module parameter
+	  or by appending ip_vs.conn_tab_bits=? to the kernel command line if
+	  IP VS was compiled built-in.
+
+comment "IPVS transport protocol load balancing support"
+
+config	IP_VS_PROTO_TCP
+	bool "TCP load balancing support"
+	help
+	  This option enables support for load balancing TCP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_UDP
+	bool "UDP load balancing support"
+	help
+	  This option enables support for load balancing UDP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH_ESP
+	def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH
+
+config	IP_VS_PROTO_ESP
+	bool "ESP load balancing support"
+	help
+	  This option enables support for load balancing ESP (Encapsulation
+	  Security Payload) transport protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH
+	bool "AH load balancing support"
+	help
+	  This option enables support for load balancing AH (Authentication
+	  Header) transport protocol. Say Y if unsure.
+
+config  IP_VS_PROTO_SCTP
+	bool "SCTP load balancing support"
+	select LIBCRC32C
+	help
+	  This option enables support for load balancing SCTP transport
+	  protocol. Say Y if unsure.
+
+comment "IPVS scheduler"
+
+config	IP_VS_RR
+	tristate "round-robin scheduling"
+	help
+	  The robin-robin scheduling algorithm simply directs network
+	  connections to different real servers in a round-robin manner.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+ 
+config	IP_VS_WRR
+	tristate "weighted round-robin scheduling"
+	help
+	  The weighted robin-robin scheduling algorithm directs network
+	  connections to different real servers based on server weights
+	  in a round-robin manner. Servers with higher weights receive
+	  new connections first than those with less weights, and servers
+	  with higher weights get more connections than those with less
+	  weights and servers with equal weights get equal connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LC
+	tristate "least-connection scheduling"
+	help
+	  The least-connection scheduling algorithm directs network
+	  connections to the server with the least number of active 
+	  connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_WLC
+	tristate "weighted least-connection scheduling"
+	help
+	  The weighted least-connection scheduling algorithm directs network
+	  connections to the server with the least active connections
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_FO
+		tristate "weighted failover scheduling"
+	help
+	  The weighted failover scheduling algorithm directs network
+	  connections to the server with the highest weight that is
+	  currently available.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_OVF
+	tristate "weighted overflow scheduling"
+	help
+	  The weighted overflow scheduling algorithm directs network
+	  connections to the server with the highest weight that is
+	  currently available and overflows to the next when active
+	  connections exceed the node's weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LBLC
+	tristate "locality-based least-connection scheduling"
+	help
+	  The locality-based least-connection scheduling algorithm is for
+	  destination IP load balancing. It is usually used in cache cluster.
+	  This algorithm usually directs packet destined for an IP address to
+	  its server if the server is alive and under load. If the server is
+	  overloaded (its active connection numbers is larger than its weight)
+	  and there is a server in its half load, then allocate the weighted
+	  least-connection server to this IP address.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_LBLCR
+	tristate "locality-based least-connection with replication scheduling"
+	help
+	  The locality-based least-connection with replication scheduling
+	  algorithm is also for destination IP load balancing. It is 
+	  usually used in cache cluster. It differs from the LBLC scheduling
+	  as follows: the load balancer maintains mappings from a target
+	  to a set of server nodes that can serve the target. Requests for
+	  a target are assigned to the least-connection node in the target's
+	  server set. If all the node in the server set are over loaded,
+	  it picks up a least-connection node in the cluster and adds it
+	  in the sever set for the target. If the server set has not been
+	  modified for the specified time, the most loaded node is removed
+	  from the server set, in order to avoid high degree of replication.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_DH
+	tristate "destination hashing scheduling"
+	help
+	  The destination hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their destination IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SH
+	tristate "source hashing scheduling"
+	help
+	  The source hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their source IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_MH
+	tristate "maglev hashing scheduling"
+	help
+	  The maglev consistent hashing scheduling algorithm provides the
+	  Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
+	  network connections to the servers through looking up a statically
+	  assigned special hash table called the lookup table. Maglev hashing
+	  is to assign a preference list of all the lookup table positions
+	  to each destination.
+
+	  Through this operation, The maglev hashing gives an almost equal
+	  share of the lookup table to each of the destinations and provides
+	  minimal disruption by using the lookup table. When the set of
+	  destinations changes, a connection will likely be sent to the same
+	  destination as it was before.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SED
+	tristate "shortest expected delay scheduling"
+	help
+	  The shortest expected delay scheduling algorithm assigns network
+	  connections to the server with the shortest expected delay. The 
+	  expected delay that the job will experience is (Ci + 1) / Ui if 
+	  sent to the ith server, in which Ci is the number of connections
+	  on the ith server and Ui is the fixed service rate (weight)
+	  of the ith server.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_NQ
+	tristate "never queue scheduling"
+	help
+	  The never queue scheduling algorithm adopts a two-speed model.
+	  When there is an idle server available, the job will be sent to
+	  the idle server, instead of waiting for a fast one. When there
+	  is no idle server available, the job will be sent to the server
+	  that minimize its expected delay (The Shortest Expected Delay
+	  scheduling algorithm).
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_TWOS
+	tristate "weighted random twos choice least-connection scheduling"
+	help
+	  The weighted random twos choice least-connection scheduling
+	  algorithm picks two random real servers and directs network
+	  connections to the server with the least active connections
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+comment 'IPVS SH scheduler'
+
+config IP_VS_SH_TAB_BITS
+	int "IPVS source hashing table size (the Nth power of 2)"
+	range 4 20
+	default 8
+	help
+	  The source hashing scheduler maps source IPs to destinations
+	  stored in a hash table. This table is tiled by each destination
+	  until all slots in the table are filled. When using weights to
+	  allow destinations to receive more connections, the table is
+	  tiled an amount proportional to the weights specified. The table
+	  needs to be large enough to effectively fit all the destinations
+	  multiplied by their respective weights.
+
+comment 'IPVS MH scheduler'
+
+config IP_VS_MH_TAB_INDEX
+	int "IPVS maglev hashing table index of size (the prime numbers)"
+	range 8 17
+	default 12
+	help
+	  The maglev hashing scheduler maps source IPs to destinations
+	  stored in a hash table. This table is assigned by a preference
+	  list of the positions to each destination until all slots in
+	  the table are filled. The index determines the prime for size of
+	  the table as 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+	  65521 or 131071. When using weights to allow destinations to
+	  receive more connections, the table is assigned an amount
+	  proportional to the weights specified. The table needs to be large
+	  enough to effectively fit all the destinations multiplied by their
+	  respective weights.
+
+comment 'IPVS application helper'
+
+config	IP_VS_FTP
+	tristate "FTP protocol helper"
+	depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
+		NF_CONNTRACK_FTP
+	select IP_VS_NFCT
+	help
+	  FTP is a protocol that transfers IP address and/or port number in
+	  the payload. In the virtual server via Network Address Translation,
+	  the IP address and port number of real servers cannot be sent to
+	  clients in ftp connections directly, so FTP protocol helper is
+	  required for tracking the connection and mangling it back to that of
+	  virtual service.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_NFCT
+	bool "Netfilter connection tracking"
+	depends on NF_CONNTRACK
+	help
+	  The Netfilter connection tracking support allows the IPVS
+	  connection state to be exported to the Netfilter framework
+	  for filtering purposes.
+
+config	IP_VS_PE_SIP
+	tristate "SIP persistence engine"
+	depends on IP_VS_PROTO_UDP
+	depends on NF_CONNTRACK_SIP
+	help
+	  Allow persistence based on the SIP Call-ID
+
+endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
new file mode 100644
index 000000000..bb5d8125c
--- /dev/null
+++ b/net/netfilter/ipvs/Makefile
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+
+# IPVS transport protocol load balancing support
+ip_vs_proto-objs-y :=
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
+
+ip_vs-extra_objs-y :=
+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
+
+ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
+		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
+		ip_vs_est.o ip_vs_proto.o ip_vs_pe.o			   \
+		$(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
+
+
+# IPVS core
+obj-$(CONFIG_IP_VS) += ip_vs.o
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
+obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_TWOS) += ip_vs_twos.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
+
+# IPVS connection template retrievers
+obj-$(CONFIG_IP_VS_PE_SIP) += ip_vs_pe_sip.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
new file mode 100644
index 000000000..fdacbc3c1
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_app.c: Application module support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses).
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+EXPORT_SYMBOL(register_ip_vs_app_inc);
+
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
+/*
+ *	Get an ip_vs_app object
+ */
+static inline int ip_vs_app_get(struct ip_vs_app *app)
+{
+	return try_module_get(app->module);
+}
+
+
+static inline void ip_vs_app_put(struct ip_vs_app *app)
+{
+	module_put(app->module);
+}
+
+static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
+{
+	kfree(inc->timeout_table);
+	kfree(inc);
+}
+
+static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
+
+	ip_vs_app_inc_destroy(inc);
+}
+
+/*
+ *	Allocate/initialize app incarnation and register it in proto apps.
+ */
+static int
+ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
+		  __u16 port)
+{
+	struct ip_vs_protocol *pp;
+	struct ip_vs_app *inc;
+	int ret;
+
+	if (!(pp = ip_vs_proto_get(proto)))
+		return -EPROTONOSUPPORT;
+
+	if (!pp->unregister_app)
+		return -EOPNOTSUPP;
+
+	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
+	if (!inc)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&inc->p_list);
+	INIT_LIST_HEAD(&inc->incs_list);
+	inc->app = app;
+	inc->port = htons(port);
+	atomic_set(&inc->usecnt, 0);
+
+	if (app->timeouts) {
+		inc->timeout_table =
+			ip_vs_create_timeout_table(app->timeouts,
+						   app->timeouts_size);
+		if (!inc->timeout_table) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = pp->register_app(ipvs, inc);
+	if (ret)
+		goto out;
+
+	list_add(&inc->a_list, &app->incs_list);
+	IP_VS_DBG(9, "%s App %s:%u registered\n",
+		  pp->name, inc->name, ntohs(inc->port));
+
+	return 0;
+
+  out:
+	ip_vs_app_inc_destroy(inc);
+	return ret;
+}
+
+
+/*
+ *	Release app incarnation
+ */
+static void
+ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_protocol *pp;
+
+	if (!(pp = ip_vs_proto_get(inc->protocol)))
+		return;
+
+	if (pp->unregister_app)
+		pp->unregister_app(ipvs, inc);
+
+	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
+		  pp->name, inc->name, ntohs(inc->port));
+
+	list_del(&inc->a_list);
+
+	call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
+}
+
+
+/*
+ *	Get reference to app inc (only called from softirq)
+ *
+ */
+int ip_vs_app_inc_get(struct ip_vs_app *inc)
+{
+	int result;
+
+	result = ip_vs_app_get(inc->app);
+	if (result)
+		atomic_inc(&inc->usecnt);
+	return result;
+}
+
+
+/*
+ *	Put the app inc (only called from timer or net softirq)
+ */
+void ip_vs_app_inc_put(struct ip_vs_app *inc)
+{
+	atomic_dec(&inc->usecnt);
+	ip_vs_app_put(inc->app);
+}
+
+
+/*
+ *	Register an application incarnation in protocol applications
+ */
+int
+register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
+		       __u16 port)
+{
+	int result;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	result = ip_vs_app_inc_new(ipvs, app, proto, port);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return result;
+}
+
+
+/* Register application for netns */
+struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
+{
+	struct ip_vs_app *a;
+	int err = 0;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc()) {
+		err = -ENOENT;
+		goto out_unlock;
+	}
+
+	list_for_each_entry(a, &ipvs->app_list, a_list) {
+		if (!strcmp(app->name, a->name)) {
+			err = -EEXIST;
+			/* decrease the module use count */
+			ip_vs_use_count_dec();
+			goto out_unlock;
+		}
+	}
+	a = kmemdup(app, sizeof(*app), GFP_KERNEL);
+	if (!a) {
+		err = -ENOMEM;
+		/* decrease the module use count */
+		ip_vs_use_count_dec();
+		goto out_unlock;
+	}
+	INIT_LIST_HEAD(&a->incs_list);
+	list_add(&a->a_list, &ipvs->app_list);
+
+out_unlock:
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return err ? ERR_PTR(err) : a;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine
+ *	We are sure there are no app incarnations attached to services
+ *	Caller should use synchronize_rcu() or rcu_barrier()
+ */
+void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
+{
+	struct ip_vs_app *a, *anxt, *inc, *nxt;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
+		if (app && strcmp(app->name, a->name))
+			continue;
+		list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
+			ip_vs_app_inc_release(ipvs, inc);
+		}
+
+		list_del(&a->a_list);
+		kfree(a);
+
+		/* decrease the module use count */
+		ip_vs_use_count_dec();
+	}
+
+	mutex_unlock(&__ip_vs_app_mutex);
+}
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
+ */
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+		   struct ip_vs_protocol *pp)
+{
+	return pp->app_conn_bind(cp);
+}
+
+
+/*
+ *	Unbind cp from application incarnation (called by cp destructor)
+ */
+void ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *inc = cp->app;
+
+	if (!inc)
+		return;
+
+	if (inc->unbind_conn)
+		inc->unbind_conn(inc, cp);
+	if (inc->done_conn)
+		inc->done_conn(inc, cp);
+	ip_vs_app_inc_put(inc);
+	cp->app = NULL;
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
+				  __func__, vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
+				  __func__, vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "%s(): subtracted delta "
+				  "(%d) from ack_seq\n", __func__, vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "%s(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  __func__, vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned int flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock_bh(&cp->lock);
+	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock_bh(&cp->lock);
+}
+
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+				  struct ip_vs_app *app,
+				  struct ip_vs_iphdr *ipvsh)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_seq(&cp->out_seq, th);
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_ack_seq(&cp->in_seq, th);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL
+ *	returns false if it can't handle packet (oom)
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+		      struct ip_vs_iphdr *ipvsh)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_out(cp, skb, app, ipvsh);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	return app->pkt_out(app, cp, skb, NULL, ipvsh);
+}
+
+
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+				 struct ip_vs_app *app,
+				 struct ip_vs_iphdr *ipvsh)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_seq(&cp->in_seq, th);
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_ack_seq(&cp->out_seq, th);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL.
+ *	returns false if can't handle packet (oom).
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+		     struct ip_vs_iphdr *ipvsh)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_in(cp, skb, app, ipvsh);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	return app->pkt_in(app, cp, skb, NULL, ipvsh);
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
+{
+	struct ip_vs_app *app, *inc;
+
+	list_for_each_entry(app, &ipvs->app_list, a_list) {
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			if (pos-- == 0)
+				return inc;
+		}
+	}
+	return NULL;
+
+}
+
+static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_app *inc, *app;
+	struct list_head *e;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_app_idx(ipvs, 0);
+
+	inc = v;
+	app = inc->app;
+
+	if ((e = inc->a_list.next) != &app->incs_list)
+		return list_entry(e, struct ip_vs_app, a_list);
+
+	/* go on to next application */
+	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
+		app = list_entry(e, struct ip_vs_app, a_list);
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			return inc;
+		}
+	}
+	return NULL;
+}
+
+static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
+{
+	mutex_unlock(&__ip_vs_app_mutex);
+}
+
+static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "prot port    usecnt name\n");
+	else {
+		const struct ip_vs_app *inc = v;
+
+		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
+			   ip_vs_proto_name(inc->protocol),
+			   ntohs(inc->port),
+			   atomic_read(&inc->usecnt),
+			   inc->name);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_app_seq_ops = {
+	.start = ip_vs_app_seq_start,
+	.next  = ip_vs_app_seq_next,
+	.stop  = ip_vs_app_seq_stop,
+	.show  = ip_vs_app_seq_show,
+};
+#endif
+
+int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
+{
+	INIT_LIST_HEAD(&ipvs->app_list);
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
+			     &ip_vs_app_seq_ops,
+			     sizeof(struct seq_net_private)))
+		return -ENOMEM;
+#endif
+	return 0;
+}
+
+void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
+{
+	unregister_ip_vs_app(ipvs, NULL /* all */);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
+#endif
+}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
new file mode 100644
index 000000000..e1b9b5290
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -0,0 +1,1538 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/net.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#include <net/net_namespace.h>
+#include <net/ip_vs.h>
+
+
+#ifndef CONFIG_IP_VS_TAB_BITS
+#define CONFIG_IP_VS_TAB_BITS	12
+#endif
+
+/*
+ * Connection hash size. Default is what was selected at compile time.
+*/
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
+MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
+
+/* size and mask values */
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
+
+/*
+ *  Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct hlist_head *ip_vs_conn_tab __read_mostly;
+
+/*  SLAB cache for IPVS connections */
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
+
+/*  counter for no client port connections */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+/* random value for IPVS connection hash */
+static unsigned int ip_vs_conn_rnd __read_mostly;
+
+/*
+ *  Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS  5
+#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
+
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
+struct ip_vs_aligned_lock
+{
+	spinlock_t	l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+static struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_write_lock_bh(unsigned int key)
+{
+	spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned int key)
+{
+	spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static void ip_vs_conn_expire(struct timer_list *t);
+
+/*
+ *	Returns hash value for IPVS connection entry
+ */
+static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
+				       const union nf_inet_addr *addr,
+				       __be16 port)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd) ^
+			((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
+#endif
+	return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd) ^
+		((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
+}
+
+static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
+					     bool inverse)
+{
+	const union nf_inet_addr *addr;
+	__be16 port;
+
+	if (p->pe_data && p->pe->hashkey_raw)
+		return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
+			ip_vs_conn_tab_mask;
+
+	if (likely(!inverse)) {
+		addr = p->caddr;
+		port = p->cport;
+	} else {
+		addr = p->vaddr;
+		port = p->vport;
+	}
+
+	return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
+}
+
+static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn_param p;
+
+	ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
+			      &cp->caddr, cp->cport, NULL, 0, &p);
+
+	if (cp->pe) {
+		p.pe = cp->pe;
+		p.pe_data = cp->pe_data;
+		p.pe_data_len = cp->pe_data_len;
+	}
+
+	return ip_vs_conn_hashkey_param(&p, false);
+}
+
+/*
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+	unsigned int hash;
+	int ret;
+
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return 0;
+
+	/* Hash by protocol, client address and port */
+	hash = ip_vs_conn_hashkey_conn(cp);
+
+	ct_write_lock_bh(hash);
+	spin_lock(&cp->lock);
+
+	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+		cp->flags |= IP_VS_CONN_F_HASHED;
+		refcount_inc(&cp->refcnt);
+		hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
+		ret = 1;
+	} else {
+		pr_err("%s(): request for already hashed, called from %pS\n",
+		       __func__, __builtin_return_address(0));
+		ret = 0;
+	}
+
+	spin_unlock(&cp->lock);
+	ct_write_unlock_bh(hash);
+
+	return ret;
+}
+
+
+/*
+ *	UNhashes ip_vs_conn from ip_vs_conn_tab.
+ *	returns bool success. Caller should hold conn reference.
+ */
+static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+	unsigned int hash;
+	int ret;
+
+	/* unhash it and decrease its reference counter */
+	hash = ip_vs_conn_hashkey_conn(cp);
+
+	ct_write_lock_bh(hash);
+	spin_lock(&cp->lock);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		hlist_del_rcu(&cp->c_list);
+		cp->flags &= ~IP_VS_CONN_F_HASHED;
+		refcount_dec(&cp->refcnt);
+		ret = 1;
+	} else
+		ret = 0;
+
+	spin_unlock(&cp->lock);
+	ct_write_unlock_bh(hash);
+
+	return ret;
+}
+
+/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
+ * returns bool success.
+ */
+static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
+{
+	unsigned int hash;
+	bool ret = false;
+
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return refcount_dec_if_one(&cp->refcnt);
+
+	hash = ip_vs_conn_hashkey_conn(cp);
+
+	ct_write_lock_bh(hash);
+	spin_lock(&cp->lock);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		/* Decrease refcnt and unlink conn only if we are last user */
+		if (refcount_dec_if_one(&cp->refcnt)) {
+			hlist_del_rcu(&cp->c_list);
+			cp->flags &= ~IP_VS_CONN_F_HASHED;
+			ret = true;
+		}
+	}
+
+	spin_unlock(&cp->lock);
+	ct_write_unlock_bh(hash);
+
+	return ret;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from OUTside-to-INside.
+ *	p->caddr, p->cport: pkt source address (foreign host)
+ *	p->vaddr, p->vport: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *
+__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
+{
+	unsigned int hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey_param(p, false);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (p->cport == cp->cport && p->vport == cp->vport &&
+		    cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
+		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
+		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
+		    p->protocol == cp->protocol &&
+		    cp->ipvs == p->ipvs) {
+			if (!__ip_vs_conn_get(cp))
+				continue;
+			/* HIT */
+			rcu_read_unlock();
+			return cp;
+		}
+	}
+
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
+{
+	struct ip_vs_conn *cp;
+
+	cp = __ip_vs_conn_in_get(p);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
+		struct ip_vs_conn_param cport_zero_p = *p;
+		cport_zero_p.cport = 0;
+		cp = __ip_vs_conn_in_get(&cport_zero_p);
+	}
+
+	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+static int
+ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
+			    int af, const struct sk_buff *skb,
+			    const struct ip_vs_iphdr *iph,
+			    struct ip_vs_conn_param *p)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return 1;
+
+	if (likely(!ip_vs_iph_inverse(iph)))
+		ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
+				      pptr[0], &iph->daddr, pptr[1], p);
+	else
+		ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
+				      pptr[1], &iph->saddr, pptr[0], p);
+	return 0;
+}
+
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
+			const struct sk_buff *skb,
+			const struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_conn_param p;
+
+	if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
+		return NULL;
+
+	return ip_vs_conn_in_get(&p);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
+
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
+{
+	unsigned int hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey_param(p, false);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (unlikely(p->pe_data && p->pe->ct_match)) {
+			if (cp->ipvs != p->ipvs)
+				continue;
+			if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
+				if (__ip_vs_conn_get(cp))
+					goto out;
+			}
+			continue;
+		}
+
+		if (cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
+		    /* protocol should only be IPPROTO_IP if
+		     * p->vaddr is a fwmark */
+		    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
+				     p->af, p->vaddr, &cp->vaddr) &&
+		    p->vport == cp->vport && p->cport == cp->cport &&
+		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
+		    p->protocol == cp->protocol &&
+		    cp->ipvs == p->ipvs) {
+			if (__ip_vs_conn_get(cp))
+				goto out;
+		}
+	}
+	cp = NULL;
+
+  out:
+	rcu_read_unlock();
+
+	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ * Called for pkts coming from inside-to-OUTside.
+ *	p->caddr, p->cport: pkt source address (inside host)
+ *	p->vaddr, p->vport: pkt dest address (foreign host) */
+struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
+{
+	unsigned int hash;
+	struct ip_vs_conn *cp, *ret=NULL;
+	const union nf_inet_addr *saddr;
+	__be16 sport;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_conn_hashkey_param(p, true);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (p->vport != cp->cport)
+			continue;
+
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+			sport = cp->vport;
+			saddr = &cp->vaddr;
+		} else {
+			sport = cp->dport;
+			saddr = &cp->daddr;
+		}
+
+		if (p->cport == sport && cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
+		    ip_vs_addr_equal(p->af, p->caddr, saddr) &&
+		    p->protocol == cp->protocol &&
+		    cp->ipvs == p->ipvs) {
+			if (!__ip_vs_conn_get(cp))
+				continue;
+			/* HIT */
+			ret = cp;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+
+	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
+		      ret ? "hit" : "not hit");
+
+	return ret;
+}
+
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
+			 const struct sk_buff *skb,
+			 const struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_conn_param p;
+
+	if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
+		return NULL;
+
+	return ip_vs_conn_out_get(&p);
+}
+EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
+
+/*
+ *      Put back the conn and restart its timer with its timeout
+ */
+static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
+{
+	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
+		0 : cp->timeout;
+	mod_timer(&cp->timer, jiffies+t);
+
+	__ip_vs_conn_put(cp);
+}
+
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
+	    (refcount_read(&cp->refcnt) == 1) &&
+	    !timer_pending(&cp->timer))
+		/* expire connection immediately */
+		ip_vs_conn_expire(&cp->timer);
+	else
+		__ip_vs_conn_put_timer(cp);
+}
+
+/*
+ *	Fill a no_client_port connection with a client port number
+ */
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+{
+	if (ip_vs_conn_unhash(cp)) {
+		spin_lock_bh(&cp->lock);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+			cp->cport = cport;
+		}
+		spin_unlock_bh(&cp->lock);
+
+		/* hash on new dport */
+		ip_vs_conn_hash(cp);
+	}
+}
+
+
+/*
+ *	Bind a connection entry with the corresponding packet_xmit.
+ *	Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->daf == AF_INET6)
+			cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+		else
+#endif
+			cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit;
+		break;
+	}
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		if (cp->daf == AF_INET6)
+			cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+		else
+			cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit_v6;
+		break;
+	}
+}
+#endif
+
+
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->activeconns)
+		+ atomic_read(&dest->inactconns);
+}
+
+/*
+ *	Bind a connection entry with a virtual service destination
+ *	Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+	unsigned int conn_flags;
+	__u32 flags;
+
+	/* if dest is NULL, then return directly */
+	if (!dest)
+		return;
+
+	/* Increase the refcnt counter of the dest */
+	ip_vs_dest_hold(dest);
+
+	conn_flags = atomic_read(&dest->conn_flags);
+	if (cp->protocol != IPPROTO_UDP)
+		conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
+	flags = cp->flags;
+	/* Bind with the destination and its corresponding transmitter */
+	if (flags & IP_VS_CONN_F_SYNC) {
+		/* if the connection is not template and is created
+		 * by sync, preserve the activity flag.
+		 */
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
+		/* connections inherit forwarding method from dest */
+		flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
+	}
+	flags |= conn_flags;
+	cp->flags = flags;
+	cp->dest = dest;
+
+	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, refcount_read(&cp->refcnt),
+		      refcount_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so modify the counters
+		 * according to the flags, later the protocol can
+		 * update them on state change
+		 */
+		if (!(flags & IP_VS_CONN_F_INACTIVE))
+			atomic_inc(&dest->activeconns);
+		else
+			atomic_inc(&dest->inactconns);
+	} else {
+		/* It is a persistent connection/template, so increase
+		   the persistent connection counter */
+		atomic_inc(&dest->persistconns);
+	}
+
+	if (dest->u_threshold != 0 &&
+	    ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+		dest->flags |= IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest;
+
+	rcu_read_lock();
+
+	/* This function is only invoked by the synchronization code. We do
+	 * not currently support heterogeneous pools with synchronization,
+	 * so we can make the assumption that the svc_af is the same as the
+	 * dest_af
+	 */
+	dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
+			       cp->dport, &cp->vaddr, cp->vport,
+			       cp->protocol, cp->fwmark, cp->flags);
+	if (dest) {
+		struct ip_vs_proto_data *pd;
+
+		spin_lock_bh(&cp->lock);
+		if (cp->dest) {
+			spin_unlock_bh(&cp->lock);
+			rcu_read_unlock();
+			return;
+		}
+
+		/* Applications work depending on the forwarding method
+		 * but better to reassign them always when binding dest */
+		if (cp->app)
+			ip_vs_unbind_app(cp);
+
+		ip_vs_bind_dest(cp, dest);
+		spin_unlock_bh(&cp->lock);
+
+		/* Update its packet transmitter */
+		cp->packet_xmit = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			ip_vs_bind_xmit_v6(cp);
+		else
+#endif
+			ip_vs_bind_xmit(cp);
+
+		pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
+		if (pd && atomic_read(&pd->appcnt))
+			ip_vs_bind_app(cp, pd->pp);
+	}
+	rcu_read_unlock();
+}
+
+
+/*
+ *	Unbind a connection entry with its VS destination
+ *	Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (!dest)
+		return;
+
+	IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, refcount_read(&cp->refcnt),
+		      refcount_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so decrease the inactconns
+		   or activeconns counter */
+		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+			atomic_dec(&dest->inactconns);
+		} else {
+			atomic_dec(&dest->activeconns);
+		}
+	} else {
+		/* It is a persistent connection/template, so decrease
+		   the persistent connection counter */
+		atomic_dec(&dest->persistconns);
+	}
+
+	if (dest->l_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else if (dest->u_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	}
+
+	ip_vs_dest_put(dest);
+}
+
+static int expire_quiescent_template(struct netns_ipvs *ipvs,
+				     struct ip_vs_dest *dest)
+{
+#ifdef CONFIG_SYSCTL
+	return ipvs->sysctl_expire_quiescent_template &&
+		(atomic_read(&dest->weight) == 0);
+#else
+	return 0;
+#endif
+}
+
+/*
+ *	Checking if the destination of a connection template is available.
+ *	If available, return 1, otherwise invalidate this connection
+ *	template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest)
+{
+	struct ip_vs_dest *dest = ct->dest;
+	struct netns_ipvs *ipvs = ct->ipvs;
+
+	/*
+	 * Checking the dest server status.
+	 */
+	if ((dest == NULL) ||
+	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+	    expire_quiescent_template(ipvs, dest) ||
+	    (cdest && (dest != cdest))) {
+		IP_VS_DBG_BUF(9, "check_template: dest not available for "
+			      "protocol %s s:%s:%d v:%s:%d "
+			      "-> d:%s:%d\n",
+			      ip_vs_proto_name(ct->protocol),
+			      IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+			      ntohs(ct->cport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+			      ntohs(ct->vport),
+			      IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
+			      ntohs(ct->dport));
+
+		/*
+		 * Invalidate the connection template
+		 */
+		if (ct->vport != htons(0xffff)) {
+			if (ip_vs_conn_unhash(ct)) {
+				ct->dport = htons(0xffff);
+				ct->vport = htons(0xffff);
+				ct->cport = 0;
+				ip_vs_conn_hash(ct);
+			}
+		}
+
+		/*
+		 * Simply decrease the refcnt of the template,
+		 * don't restart its timer.
+		 */
+		__ip_vs_conn_put(ct);
+		return 0;
+	}
+	return 1;
+}
+
+static void ip_vs_conn_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
+					     rcu_head);
+
+	ip_vs_pe_put(cp->pe);
+	kfree(cp->pe_data);
+	kmem_cache_free(ip_vs_conn_cachep, cp);
+}
+
+/* Try to delete connection while not holding reference */
+static void ip_vs_conn_del(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer)) {
+		/* Drop cp->control chain too */
+		if (cp->control)
+			cp->timeout = 0;
+		ip_vs_conn_expire(&cp->timer);
+	}
+}
+
+/* Try to delete connection while holding reference */
+static void ip_vs_conn_del_put(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer)) {
+		/* Drop cp->control chain too */
+		if (cp->control)
+			cp->timeout = 0;
+		__ip_vs_conn_put(cp);
+		ip_vs_conn_expire(&cp->timer);
+	} else {
+		__ip_vs_conn_put(cp);
+	}
+}
+
+static void ip_vs_conn_expire(struct timer_list *t)
+{
+	struct ip_vs_conn *cp = from_timer(cp, t, timer);
+	struct netns_ipvs *ipvs = cp->ipvs;
+
+	/*
+	 *	do I control anybody?
+	 */
+	if (atomic_read(&cp->n_control))
+		goto expire_later;
+
+	/* Unlink conn if not referenced anymore */
+	if (likely(ip_vs_conn_unlink(cp))) {
+		struct ip_vs_conn *ct = cp->control;
+
+		/* delete the timer if it is activated by other users */
+		del_timer(&cp->timer);
+
+		/* does anybody control me? */
+		if (ct) {
+			bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
+
+			ip_vs_control_del(cp);
+			/* Drop CTL or non-assured TPL if not used anymore */
+			if (has_ref && !atomic_read(&ct->n_control) &&
+			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
+			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
+				IP_VS_DBG(4, "drop controlling connection\n");
+				ip_vs_conn_del_put(ct);
+			} else if (has_ref) {
+				__ip_vs_conn_put(ct);
+			}
+		}
+
+		if ((cp->flags & IP_VS_CONN_F_NFCT) &&
+		    !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
+			/* Do not access conntracks during subsys cleanup
+			 * because nf_conntrack_find_get can not be used after
+			 * conntrack cleanup for the net.
+			 */
+			smp_rmb();
+			if (ipvs->enable)
+				ip_vs_conn_drop_conntrack(cp);
+		}
+
+		if (unlikely(cp->app != NULL))
+			ip_vs_unbind_app(cp);
+		ip_vs_unbind_dest(cp);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+			ip_vs_conn_rcu_free(&cp->rcu_head);
+		else
+			call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
+		atomic_dec(&ipvs->conn_count);
+		return;
+	}
+
+  expire_later:
+	IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
+		  refcount_read(&cp->refcnt),
+		  atomic_read(&cp->n_control));
+
+	refcount_inc(&cp->refcnt);
+	cp->timeout = 60*HZ;
+
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
+
+	__ip_vs_conn_put_timer(cp);
+}
+
+/* Modify timer, so that it expires as soon as possible.
+ * Can be called without reference only if under RCU lock.
+ * We can have such chain of conns linked with ->control: DATA->CTL->TPL
+ * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
+ * - cp->timeout=0 indicates all conns from chain should be dropped but
+ * TPL is not dropped if in assured state
+ */
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+	/* Using mod_timer_pending will ensure the timer is not
+	 * modified after the final del_timer in ip_vs_conn_expire.
+	 */
+	if (timer_pending(&cp->timer) &&
+	    time_after(cp->timer.expires, jiffies))
+		mod_timer_pending(&cp->timer, jiffies);
+}
+
+
+/*
+ *	Create a new connection entry and hash it into the ip_vs_conn_tab
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
+	       const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
+	       struct ip_vs_dest *dest, __u32 fwmark)
+{
+	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = p->ipvs;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
+							   p->protocol);
+
+	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	if (cp == NULL) {
+		IP_VS_ERR_RL("%s(): no memory\n", __func__);
+		return NULL;
+	}
+
+	INIT_HLIST_NODE(&cp->c_list);
+	timer_setup(&cp->timer, ip_vs_conn_expire, 0);
+	cp->ipvs	   = ipvs;
+	cp->af		   = p->af;
+	cp->daf		   = dest_af;
+	cp->protocol	   = p->protocol;
+	ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
+	cp->cport	   = p->cport;
+	/* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
+	ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+		       &cp->vaddr, p->vaddr);
+	cp->vport	   = p->vport;
+	ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
+	cp->dport          = dport;
+	cp->flags	   = flags;
+	cp->fwmark         = fwmark;
+	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+		ip_vs_pe_get(p->pe);
+		cp->pe = p->pe;
+		cp->pe_data = p->pe_data;
+		cp->pe_data_len = p->pe_data_len;
+	} else {
+		cp->pe = NULL;
+		cp->pe_data = NULL;
+		cp->pe_data_len = 0;
+	}
+	spin_lock_init(&cp->lock);
+
+	/*
+	 * Set the entry is referenced by the current thread before hashing
+	 * it in the table, so that other thread run ip_vs_random_dropentry
+	 * but cannot drop this entry.
+	 */
+	refcount_set(&cp->refcnt, 1);
+
+	cp->control = NULL;
+	atomic_set(&cp->n_control, 0);
+	atomic_set(&cp->in_pkts, 0);
+
+	cp->packet_xmit = NULL;
+	cp->app = NULL;
+	cp->app_data = NULL;
+	/* reset struct ip_vs_seq */
+	cp->in_seq.delta = 0;
+	cp->out_seq.delta = 0;
+
+	atomic_inc(&ipvs->conn_count);
+	if (flags & IP_VS_CONN_F_NO_CPORT)
+		atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+	/* Bind the connection with a destination server */
+	cp->dest = NULL;
+	ip_vs_bind_dest(cp, dest);
+
+	/* Set its state and timeout */
+	cp->state = 0;
+	cp->old_state = 0;
+	cp->timeout = 3*HZ;
+	cp->sync_endtime = jiffies & ~3UL;
+
+	/* Bind its packet transmitter */
+#ifdef CONFIG_IP_VS_IPV6
+	if (p->af == AF_INET6)
+		ip_vs_bind_xmit_v6(cp);
+	else
+#endif
+		ip_vs_bind_xmit(cp);
+
+	if (unlikely(pd && atomic_read(&pd->appcnt)))
+		ip_vs_bind_app(cp, pd->pp);
+
+	/*
+	 * Allow conntrack to be preserved. By default, conntrack
+	 * is created and destroyed for every packet.
+	 * Sometimes keeping conntrack can be useful for
+	 * IP_VS_CONN_F_ONE_PACKET too.
+	 */
+
+	if (ip_vs_conntrack_enabled(ipvs))
+		cp->flags |= IP_VS_CONN_F_NFCT;
+
+	/* Hash it in the ip_vs_conn_tab finally */
+	ip_vs_conn_hash(cp);
+
+	return cp;
+}
+
+/*
+ *	/proc/net/ip_vs_conn entries
+ */
+#ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+	struct seq_net_private	p;
+	struct hlist_head	*l;
+};
+
+static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	struct ip_vs_iter_state *iter = seq->private;
+
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+			/* __ip_vs_conn_get() is not needed by
+			 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
+			 */
+			if (pos-- == 0) {
+				iter->l = &ip_vs_conn_tab[idx];
+				return cp;
+			}
+		}
+		cond_resched_rcu();
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	struct ip_vs_iter_state *iter = seq->private;
+
+	iter->l = NULL;
+	rcu_read_lock();
+	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
+}
+
+static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_conn *cp = v;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct hlist_node *e;
+	struct hlist_head *l = iter->l;
+	int idx;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_conn_array(seq, 0);
+
+	/* more on same hash chain? */
+	e = rcu_dereference(hlist_next_rcu(&cp->c_list));
+	if (e)
+		return hlist_entry(e, struct ip_vs_conn, c_list);
+
+	idx = l - ip_vs_conn_tab;
+	while (++idx < ip_vs_conn_tab_size) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+			iter->l = &ip_vs_conn_tab[idx];
+			return cp;
+		}
+		cond_resched_rcu();
+	}
+	iter->l = NULL;
+	return NULL;
+}
+
+static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
+		char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
+		size_t len = 0;
+		char dbuf[IP_VS_ADDRSTRLEN];
+
+		if (!net_eq(cp->ipvs->net, net))
+			return 0;
+		if (cp->pe_data) {
+			pe_data[0] = ' ';
+			len = strlen(cp->pe->name);
+			memcpy(pe_data + 1, cp->pe->name, len);
+			pe_data[len + 1] = ' ';
+			len += 2;
+			len += cp->pe->show_pe_data(cp, pe_data + len);
+		}
+		pe_data[len] = '\0';
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->daf == AF_INET6)
+			snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+		else
+#endif
+			snprintf(dbuf, sizeof(dbuf), "%08X",
+				 ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+				"%s %04X %-11s %7u%s\n",
+				ip_vs_proto_name(cp->protocol),
+				&cp->caddr.in6, ntohs(cp->cport),
+				&cp->vaddr.in6, ntohs(cp->vport),
+				dbuf, ntohs(cp->dport),
+				ip_vs_state_name(cp),
+				jiffies_delta_to_msecs(cp->timer.expires -
+						       jiffies) / 1000,
+				pe_data);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X"
+				" %s %04X %-11s %7u%s\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				dbuf, ntohs(cp->dport),
+				ip_vs_state_name(cp),
+				jiffies_delta_to_msecs(cp->timer.expires -
+						       jiffies) / 1000,
+				pe_data);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_seq_show,
+};
+
+static const char *ip_vs_origin_name(unsigned int flags)
+{
+	if (flags & IP_VS_CONN_F_SYNC)
+		return "SYNC";
+	else
+		return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+	char dbuf[IP_VS_ADDRSTRLEN];
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
+
+		if (!net_eq(cp->ipvs->net, net))
+			return 0;
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->daf == AF_INET6)
+			snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+		else
+#endif
+			snprintf(dbuf, sizeof(dbuf), "%08X",
+				 ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+				"%s %04X %-11s %-6s %7u\n",
+				ip_vs_proto_name(cp->protocol),
+				&cp->caddr.in6, ntohs(cp->cport),
+				&cp->vaddr.in6, ntohs(cp->vport),
+				dbuf, ntohs(cp->dport),
+				ip_vs_state_name(cp),
+				ip_vs_origin_name(cp->flags),
+				jiffies_delta_to_msecs(cp->timer.expires -
+						       jiffies) / 1000);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X "
+				"%s %04X %-11s %-6s %7u\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				dbuf, ntohs(cp->dport),
+				ip_vs_state_name(cp),
+				ip_vs_origin_name(cp->flags),
+				jiffies_delta_to_msecs(cp->timer.expires -
+						       jiffies) / 1000);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_sync_seq_show,
+};
+#endif
+
+
+/* Randomly drop connection entries before running out of memory
+ * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
+ * - traffic for services in OPS mode increases ct->in_pkts, so it is supported
+ * - traffic for services not in OPS mode does not increase ct->in_pkts in
+ * all cases, so it is not supported
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+	/*
+	 * The drop rate array needs tuning for real environments.
+	 * Called from timer bh only => no locking
+	 */
+	static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static signed char todrop_counter[9] = {0};
+	int i;
+
+	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+	   This will leave enough time for normal connection to get
+	   through. */
+	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
+		return 0;
+
+	/* Don't drop the entry if its number of incoming packets is not
+	   located in [0, 8] */
+	i = atomic_read(&cp->in_pkts);
+	if (i > 8 || i < 0) return 0;
+
+	if (!todrop_rate[i]) return 0;
+	if (--todrop_counter[i] > 0) return 0;
+
+	todrop_counter[i] = todrop_rate[i];
+	return 1;
+}
+
+static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
+{
+	struct ip_vs_service *svc;
+
+	if (!cp->dest)
+		return false;
+	svc = rcu_dereference(cp->dest->svc);
+	return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET);
+}
+
+/* Called from keventd and must protect itself from softirqs */
+void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+	rcu_read_lock();
+	/*
+	 * Randomly scan 1/32 of the whole table every second
+	 */
+	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
+		unsigned int hash = get_random_u32() & ip_vs_conn_tab_mask;
+
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+			if (cp->ipvs != ipvs)
+				continue;
+			if (atomic_read(&cp->n_control))
+				continue;
+			if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+				/* connection template of OPS */
+				if (ip_vs_conn_ops_mode(cp))
+					goto try_drop;
+				if (!(cp->state & IP_VS_CTPL_S_ASSURED))
+					goto drop;
+				continue;
+			}
+			if (cp->protocol == IPPROTO_TCP) {
+				switch(cp->state) {
+				case IP_VS_TCP_S_SYN_RECV:
+				case IP_VS_TCP_S_SYNACK:
+					break;
+
+				case IP_VS_TCP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+
+				default:
+					continue;
+				}
+			} else if (cp->protocol == IPPROTO_SCTP) {
+				switch (cp->state) {
+				case IP_VS_SCTP_S_INIT1:
+				case IP_VS_SCTP_S_INIT:
+					break;
+				case IP_VS_SCTP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+				default:
+					continue;
+				}
+			} else {
+try_drop:
+				if (!todrop_entry(cp))
+					continue;
+			}
+
+drop:
+			IP_VS_DBG(4, "drop connection\n");
+			ip_vs_conn_del(cp);
+		}
+		cond_resched_rcu();
+	}
+	rcu_read_unlock();
+}
+
+
+/*
+ *      Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
+{
+	int idx;
+	struct ip_vs_conn *cp, *cp_c;
+
+flush_again:
+	rcu_read_lock();
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
+
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+			if (cp->ipvs != ipvs)
+				continue;
+			if (atomic_read(&cp->n_control))
+				continue;
+			cp_c = cp->control;
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_del(cp);
+			if (cp_c && !atomic_read(&cp_c->n_control)) {
+				IP_VS_DBG(4, "del controlling connection\n");
+				ip_vs_conn_del(cp_c);
+			}
+		}
+		cond_resched_rcu();
+	}
+	rcu_read_unlock();
+
+	/* the counter may be not NULL, because maybe some conn entries
+	   are run by slow timer handler or unhashed but still referred */
+	if (atomic_read(&ipvs->conn_count) != 0) {
+		schedule();
+		goto flush_again;
+	}
+}
+
+#ifdef CONFIG_SYSCTL
+void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
+{
+	int idx;
+	struct ip_vs_conn *cp, *cp_c;
+	struct ip_vs_dest *dest;
+
+	rcu_read_lock();
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
+		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+			if (cp->ipvs != ipvs)
+				continue;
+
+			dest = cp->dest;
+			if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE))
+				continue;
+
+			if (atomic_read(&cp->n_control))
+				continue;
+
+			cp_c = cp->control;
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_del(cp);
+			if (cp_c && !atomic_read(&cp_c->n_control)) {
+				IP_VS_DBG(4, "del controlling connection\n");
+				ip_vs_conn_del(cp_c);
+			}
+		}
+		cond_resched_rcu();
+
+		/* netns clean up started, abort delayed work */
+		if (!ipvs->enable)
+			break;
+	}
+	rcu_read_unlock();
+}
+#endif
+
+/*
+ * per netns init and exit
+ */
+int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
+{
+	atomic_set(&ipvs->conn_count, 0);
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+			     &ip_vs_conn_seq_ops,
+			     sizeof(struct ip_vs_iter_state)))
+		goto err_conn;
+
+	if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+			     &ip_vs_conn_sync_seq_ops,
+			     sizeof(struct ip_vs_iter_state)))
+		goto err_conn_sync;
+#endif
+
+	return 0;
+
+#ifdef CONFIG_PROC_FS
+err_conn_sync:
+	remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+err_conn:
+	return -ENOMEM;
+#endif
+}
+
+void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush(ipvs);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+	remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
+#endif
+}
+
+int __init ip_vs_conn_init(void)
+{
+	int idx;
+
+	/* Compute size and mask */
+	if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 27) {
+		pr_info("conn_tab_bits not in [8, 27]. Using default value\n");
+		ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+	}
+	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
+	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
+
+	/*
+	 * Allocate the connection hash table and initialize its list heads
+	 */
+	ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
+					    sizeof(*ip_vs_conn_tab)));
+	if (!ip_vs_conn_tab)
+		return -ENOMEM;
+
+	/* Allocate ip_vs_conn slab cache */
+	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+					      sizeof(struct ip_vs_conn), 0,
+					      SLAB_HWCACHE_ALIGN, NULL);
+	if (!ip_vs_conn_cachep) {
+		vfree(ip_vs_conn_tab);
+		return -ENOMEM;
+	}
+
+	pr_info("Connection hash table configured "
+		"(size=%d, memory=%ldKbytes)\n",
+		ip_vs_conn_tab_size,
+		(long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
+	IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
+		  sizeof(struct ip_vs_conn));
+
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
+		INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
+
+	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
+		spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
+	}
+
+	/* calculate the random value for connection hash */
+	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+
+	return 0;
+}
+
+void ip_vs_conn_cleanup(void)
+{
+	/* Wait all ip_vs_conn_rcu_free() callbacks to complete */
+	rcu_barrier();
+	/* Release the empty cache */
+	kmem_cache_destroy(ip_vs_conn_cachep);
+	vfree(ip_vs_conn_tab);
+}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
new file mode 100644
index 000000000..b5ae41966
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -0,0 +1,2456 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ *	Paul `Rusty' Russell		properly handle non-linear skbs
+ *	Harald Welte			don't use nfcache
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/icmp.h>
+#include <linux/slab.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/gue.h>
+#include <net/gre.h>
+#include <net/route.h>
+#include <net/ip6_checksum.h>
+#include <net/netns/generic.h>		/* net_generic() */
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/ip6_route.h>
+#endif
+
+#include <net/ip_vs.h>
+#include <linux/indirect_call_wrapper.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
+#endif
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+EXPORT_SYMBOL(ip_vs_new_conn_out);
+
+#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) \
+	INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_TCP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__)
+#else
+#define SNAT_CALL(f, ...) f(__VA_ARGS__)
+#endif
+
+static unsigned int ip_vs_net_id __read_mostly;
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)          (((icmph)->un).echo.id)
+#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
+
+const char *ip_vs_proto_name(unsigned int proto)
+{
+	static char buf[20];
+
+	switch (proto) {
+	case IPPROTO_IP:
+		return "IP";
+	case IPPROTO_UDP:
+		return "UDP";
+	case IPPROTO_TCP:
+		return "TCP";
+	case IPPROTO_SCTP:
+		return "SCTP";
+	case IPPROTO_ICMP:
+		return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+	case IPPROTO_ICMPV6:
+		return "ICMPv6";
+#endif
+	default:
+		sprintf(buf, "IP_%u", proto);
+		return buf;
+	}
+}
+
+void ip_vs_init_hash_table(struct list_head *table, int rows)
+{
+	while (--rows >= 0)
+		INIT_LIST_HEAD(&table[rows]);
+}
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = cp->ipvs;
+
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		struct ip_vs_cpu_stats *s;
+		struct ip_vs_service *svc;
+
+		local_bh_disable();
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.inpkts);
+		u64_stats_add(&s->cnt.inbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		svc = rcu_dereference(dest->svc);
+		s = this_cpu_ptr(svc->stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.inpkts);
+		u64_stats_add(&s->cnt.inbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.inpkts);
+		u64_stats_add(&s->cnt.inbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		local_bh_enable();
+	}
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = cp->ipvs;
+
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		struct ip_vs_cpu_stats *s;
+		struct ip_vs_service *svc;
+
+		local_bh_disable();
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.outpkts);
+		u64_stats_add(&s->cnt.outbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		svc = rcu_dereference(dest->svc);
+		s = this_cpu_ptr(svc->stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.outpkts);
+		u64_stats_add(&s->cnt.outbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
+		u64_stats_update_begin(&s->syncp);
+		u64_stats_inc(&s->cnt.outpkts);
+		u64_stats_add(&s->cnt.outbytes, skb->len);
+		u64_stats_update_end(&s->syncp);
+
+		local_bh_enable();
+	}
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+	struct netns_ipvs *ipvs = svc->ipvs;
+	struct ip_vs_cpu_stats *s;
+
+	local_bh_disable();
+
+	s = this_cpu_ptr(cp->dest->stats.cpustats);
+	u64_stats_update_begin(&s->syncp);
+	u64_stats_inc(&s->cnt.conns);
+	u64_stats_update_end(&s->syncp);
+
+	s = this_cpu_ptr(svc->stats.cpustats);
+	u64_stats_update_begin(&s->syncp);
+	u64_stats_inc(&s->cnt.conns);
+	u64_stats_update_end(&s->syncp);
+
+	s = this_cpu_ptr(ipvs->tot_stats.cpustats);
+	u64_stats_update_begin(&s->syncp);
+	u64_stats_inc(&s->cnt.conns);
+	u64_stats_update_end(&s->syncp);
+
+	local_bh_enable();
+}
+
+
+static inline void
+ip_vs_set_state(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb,
+		struct ip_vs_proto_data *pd)
+{
+	if (likely(pd->pp->state_transition))
+		pd->pp->state_transition(cp, direction, skb, pd);
+}
+
+static inline int
+ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
+			      struct sk_buff *skb, int protocol,
+			      const union nf_inet_addr *caddr, __be16 cport,
+			      const union nf_inet_addr *vaddr, __be16 vport,
+			      struct ip_vs_conn_param *p)
+{
+	ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
+			      vport, p);
+	p->pe = rcu_dereference(svc->pe);
+	if (p->pe && p->pe->fill_param)
+		return p->pe->fill_param(p, skb);
+
+	return 0;
+}
+
+/*
+ *  IPVS persistent scheduling function
+ *  It creates a connection entry according to its template if exists,
+ *  or selects a server and creates a connection entry plus a template.
+ *  Locking: we are svc user (svc->refcnt), so we hold all dests too
+ *  Protocols supported: TCP, UDP
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc,
+		    struct sk_buff *skb, __be16 src_port, __be16 dst_port,
+		    int *ignored, struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *ct;
+	__be16 dport = 0;		/* destination port to forward */
+	unsigned int flags;
+	struct ip_vs_conn_param param;
+	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
+	union nf_inet_addr snet;	/* source network of the client,
+					   after masking */
+	const union nf_inet_addr *src_addr, *dst_addr;
+
+	if (likely(!ip_vs_iph_inverse(iph))) {
+		src_addr = &iph->saddr;
+		dst_addr = &iph->daddr;
+	} else {
+		src_addr = &iph->daddr;
+		dst_addr = &iph->saddr;
+	}
+
+
+	/* Mask saddr with the netmask to adjust template granularity */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		ipv6_addr_prefix(&snet.in6, &src_addr->in6,
+				 (__force __u32) svc->netmask);
+	else
+#endif
+		snet.ip = src_addr->ip & svc->netmask;
+
+	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+		      "mnet %s\n",
+		      IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
+		      IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
+		      IP_VS_DBG_ADDR(svc->af, &snet));
+
+	/*
+	 * As far as we know, FTP is a very complicated network protocol, and
+	 * it uses control connection and data connections. For active FTP,
+	 * FTP server initialize data connection to the client, its source port
+	 * is often 20. For passive FTP, FTP server tells the clients the port
+	 * that it passively listens to,  and the client issues the data
+	 * connection. In the tunneling or direct routing mode, the load
+	 * balancer is on the client-to-server half of connection, the port
+	 * number is unknown to the load balancer. So, a conn template like
+	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+	 * is created for other persistent services.
+	 */
+	{
+		int protocol = iph->protocol;
+		const union nf_inet_addr *vaddr = dst_addr;
+		__be16 vport = 0;
+
+		if (dst_port == svc->port) {
+			/* non-FTP template:
+			 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
+			 * FTP template:
+			 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
+			 */
+			if (svc->port != FTPPORT)
+				vport = dst_port;
+		} else {
+			/* Note: persistent fwmark-based services and
+			 * persistent port zero service are handled here.
+			 * fwmark template:
+			 * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+			 * port zero template:
+			 * <protocol,caddr,0,vaddr,0,daddr,0>
+			 */
+			if (svc->fwmark) {
+				protocol = IPPROTO_IP;
+				vaddr = &fwmark;
+			}
+		}
+		/* return *ignored = -1 so NF_DROP can be used */
+		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+						  vaddr, vport, &param) < 0) {
+			*ignored = -1;
+			return NULL;
+		}
+	}
+
+	/* Check if a template already exists */
+	ct = ip_vs_ct_in_get(&param);
+	if (!ct || !ip_vs_check_template(ct, NULL)) {
+		struct ip_vs_scheduler *sched;
+
+		/*
+		 * No template found or the dest of the connection
+		 * template is not available.
+		 * return *ignored=0 i.e. ICMP and NF_DROP
+		 */
+		sched = rcu_dereference(svc->scheduler);
+		if (sched) {
+			/* read svc->sched_data after svc->scheduler */
+			smp_rmb();
+			dest = sched->schedule(svc, skb, iph);
+		} else {
+			dest = NULL;
+		}
+		if (!dest) {
+			IP_VS_DBG(1, "p-schedule: no dest found.\n");
+			kfree(param.pe_data);
+			*ignored = 0;
+			return NULL;
+		}
+
+		if (dst_port == svc->port && svc->port != FTPPORT)
+			dport = dest->port;
+
+		/* Create a template
+		 * This adds param.pe_data to the template,
+		 * and thus param.pe_data will be destroyed
+		 * when the template expires */
+		ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
+				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
+		if (ct == NULL) {
+			kfree(param.pe_data);
+			*ignored = -1;
+			return NULL;
+		}
+
+		ct->timeout = svc->timeout;
+	} else {
+		/* set destination with the found template */
+		dest = ct->dest;
+		kfree(param.pe_data);
+	}
+
+	dport = dst_port;
+	if (dport == svc->port && dest->port)
+		dport = dest->port;
+
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph->protocol == IPPROTO_UDP) ?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
+	/*
+	 *    Create a new connection according to the template
+	 */
+	ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
+			      src_port, dst_addr, dst_port, &param);
+
+	cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
+			    skb->mark);
+	if (cp == NULL) {
+		ip_vs_conn_put(ct);
+		*ignored = -1;
+		return NULL;
+	}
+
+	/*
+	 *    Add its control
+	 */
+	ip_vs_control_add(cp, ct);
+	ip_vs_conn_put(ct);
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  IPVS main scheduling function
+ *  It selects a server according to the virtual service, and
+ *  creates a connection entry.
+ *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
+ */
+struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
+	       struct ip_vs_proto_data *pd, int *ignored,
+	       struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_protocol *pp = pd->pp;
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_scheduler *sched;
+	struct ip_vs_dest *dest;
+	__be16 _ports[2], *pptr, cport, vport;
+	const void *caddr, *vaddr;
+	unsigned int flags;
+
+	*ignored = 1;
+	/*
+	 * IPv6 frags, only the first hit here.
+	 */
+	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!ip_vs_iph_inverse(iph))) {
+		cport = pptr[0];
+		caddr = &iph->saddr;
+		vport = pptr[1];
+		vaddr = &iph->daddr;
+	} else {
+		cport = pptr[1];
+		caddr = &iph->daddr;
+		vport = pptr[0];
+		vaddr = &iph->saddr;
+	}
+
+	/*
+	 * FTPDATA needs this check when using local real server.
+	 * Never schedule Active FTPDATA connections from real server.
+	 * For LVS-NAT they must be already created. For other methods
+	 * with persistence the connection is created on SYN+ACK.
+	 */
+	if (cport == FTPDATA) {
+		IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+			      "Not scheduling FTPDATA");
+		return NULL;
+	}
+
+	/*
+	 *    Do not schedule replies from local real server.
+	 */
+	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
+		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+		cp = INDIRECT_CALL_1(pp->conn_in_get,
+				     ip_vs_conn_in_get_proto, svc->ipvs,
+				     svc->af, skb, iph);
+		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+
+		if (cp) {
+			IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+				      "Not scheduling reply for existing"
+				      " connection");
+			__ip_vs_conn_put(cp);
+			return NULL;
+		}
+	}
+
+	/*
+	 *    Persistent service
+	 */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
+					   iph);
+
+	*ignored = 0;
+
+	/*
+	 *    Non-persistent service
+	 */
+	if (!svc->fwmark && vport != svc->port) {
+		if (!svc->port)
+			pr_err("Schedule: port zero only supported "
+			       "in persistent services, "
+			       "check your ipvs configuration\n");
+		return NULL;
+	}
+
+	sched = rcu_dereference(svc->scheduler);
+	if (sched) {
+		/* read svc->sched_data after svc->scheduler */
+		smp_rmb();
+		dest = sched->schedule(svc, skb, iph);
+	} else {
+		dest = NULL;
+	}
+	if (dest == NULL) {
+		IP_VS_DBG(1, "Schedule: no dest found.\n");
+		return NULL;
+	}
+
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph->protocol == IPPROTO_UDP) ?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
+	/*
+	 *    Create a connection entry.
+	 */
+	{
+		struct ip_vs_conn_param p;
+
+		ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+				      caddr, cport, vaddr, vport, &p);
+		cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
+				    dest->port ? dest->port : vport,
+				    flags, dest, skb->mark);
+		if (!cp) {
+			*ignored = -1;
+			return NULL;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+		      ip_vs_fwd_tag(cp),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
+		      cp->flags, refcount_read(&cp->refcnt));
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
+					union nf_inet_addr *addr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
+#endif
+	return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
+}
+
+/*
+ *  Pass or drop the packet.
+ *  Called by ip_vs_in, when the virtual service is available but
+ *  no destination is available for a new connection.
+ */
+int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
+		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
+{
+	__be16 _ports[2], *pptr, dport;
+	struct netns_ipvs *ipvs = svc->ipvs;
+	struct net *net = ipvs->net;
+
+	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
+	if (!pptr)
+		return NF_DROP;
+	dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
+
+	/* if it is fwmark-based service, the cache_bypass sysctl is up
+	   and the destination is a non-local unicast, then create
+	   a cache_bypass connection entry */
+	if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
+	    !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
+	    ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
+		int ret;
+		struct ip_vs_conn *cp;
+		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				      iph->protocol == IPPROTO_UDP) ?
+				      IP_VS_CONN_F_ONE_PACKET : 0;
+		union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
+
+		/* create a new connection entry */
+		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
+		{
+			struct ip_vs_conn_param p;
+			ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+					      &iph->saddr, pptr[0],
+					      &iph->daddr, pptr[1], &p);
+			cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
+					    IP_VS_CONN_F_BYPASS | flags,
+					    NULL, skb->mark);
+			if (!cp)
+				return NF_DROP;
+		}
+
+		/* statistics */
+		ip_vs_in_stats(cp, skb);
+
+		/* set state */
+		ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
+
+		/* transmit the first SYN packet */
+		ret = cp->packet_xmit(skb, cp, pd->pp, iph);
+		/* do not touch skb anymore */
+
+		if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+			atomic_inc(&cp->control->in_pkts);
+		else
+			atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * When the virtual ftp service is presented, packets destined
+	 * for other services on the VIP may get here (except services
+	 * listed in the ipvs table), pass the packets, because it is
+	 * not ipvs job to decide to drop the packets.
+	 */
+	if (svc->port == FTPPORT && dport != FTPPORT)
+		return NF_ACCEPT;
+
+	if (unlikely(ip_vs_iph_icmp(iph)))
+		return NF_DROP;
+
+	/*
+	 * Notify the client that the destination is unreachable, and
+	 * release the socket buffer.
+	 * Since it is in IP layer, the TCP socket is not actually
+	 * created, the TCP RST packet cannot be sent, instead that
+	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		if (!skb->dev)
+			skb->dev = net->loopback_dev;
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+	} else
+#endif
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+	return NF_DROP;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_snat_reroute;
+}
+
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_nat_icmp_send;
+}
+
+#else
+
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
+
+#endif
+
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+{
+	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+}
+
+static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
+{
+	if (NF_INET_LOCAL_IN == hooknum)
+		return IP_DEFRAG_VS_IN;
+	if (NF_INET_FORWARD == hooknum)
+		return IP_DEFRAG_VS_FWD;
+	return IP_DEFRAG_VS_OUT;
+}
+
+static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
+				     struct sk_buff *skb, u_int32_t user)
+{
+	int err;
+
+	local_bh_disable();
+	err = ip_defrag(ipvs->net, skb, user);
+	local_bh_enable();
+	if (!err)
+		ip_send_check(ip_hdr(skb));
+
+	return err;
+}
+
+static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
+				 struct sk_buff *skb, unsigned int hooknum)
+{
+	if (!sysctl_snat_reroute(ipvs))
+		return 0;
+	/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
+	if (NF_INET_LOCAL_IN == hooknum)
+		return 0;
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		struct dst_entry *dst = skb_dst(skb);
+
+		if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
+		    ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
+			return 1;
+	} else
+#endif
+		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+		    ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
+			return 1;
+
+	return 0;
+}
+
+/*
+ * Packet has been made sufficiently writable in caller
+ * - inout: 1=in->out, 0=out->in
+ */
+void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct iphdr *iph	 = ip_hdr(skb);
+	unsigned int icmp_offset = iph->ihl*4;
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.ip;
+		ip_send_check(iph);
+		ciph->daddr = cp->vaddr.ip;
+		ip_send_check(ciph);
+	} else {
+		iph->daddr = cp->daddr.ip;
+		ip_send_check(iph);
+		ciph->saddr = cp->daddr.ip;
+		ip_send_check(ciph);
+	}
+
+	/* the TCP/UDP/SCTP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+	    IPPROTO_SCTP == ciph->protocol) {
+		__be16 *ports = (void *)ciph + ciph->ihl*4;
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMP");
+	else
+		IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMP");
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct ipv6hdr *iph	 = ipv6_hdr(skb);
+	unsigned int icmp_offset = 0;
+	unsigned int offs	 = 0; /* header offset*/
+	int protocol;
+	struct icmp6hdr *icmph;
+	struct ipv6hdr *ciph;
+	unsigned short fragoffs;
+
+	ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
+	icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
+	offs = icmp_offset + sizeof(struct icmp6hdr);
+	ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
+
+	protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.in6;
+		ciph->daddr = cp->vaddr.in6;
+	} else {
+		iph->daddr = cp->daddr.in6;
+		ciph->saddr = cp->daddr.in6;
+	}
+
+	/* the TCP/UDP/SCTP port */
+	if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+			  IPPROTO_SCTP == protocol)) {
+		__be16 *ports = (void *)(skb_network_header(skb) + offs);
+
+		IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
+			      ntohs(inout ? ports[1] : ports[0]),
+			      ntohs(inout ? cp->vport : cp->dport));
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
+					      skb->len - icmp_offset,
+					      IPPROTO_ICMPV6, 0);
+	skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
+	skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+			      (void *)ciph - (void *)iph,
+			      "Forwarding altered outgoing ICMPv6");
+	else
+		IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+			      (void *)ciph - (void *)iph,
+			      "Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+				union nf_inet_addr *snet,
+				__u8 protocol, struct ip_vs_conn *cp,
+				struct ip_vs_protocol *pp,
+				unsigned int offset, unsigned int ihl,
+				unsigned int hooknum)
+{
+	unsigned int verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto after_nat;
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+			      IP_VS_DBG_ADDR(af, snet));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+	    IPPROTO_SCTP == protocol)
+		offset += 2 * sizeof(__u16);
+	if (skb_ensure_writable(skb, offset))
+		goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+	else
+#endif
+		ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
+		goto out;
+
+after_nat:
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		ip_vs_notrack(skb);
+	else
+		ip_vs_update_conntrack(skb, cp, 0);
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+/*
+ *	Handle ICMP messages in the inside-to-outside direction (outgoing).
+ *	Find any that might be relevant, check against existing connections.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  int *related, unsigned int hooknum)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_is_fragment(ip_hdr(skb))) {
+		if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %pI4->%pI4\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  &iph->saddr, &iph->daddr);
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+		      "Checking outgoing ICMP for");
+
+	ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
+
+	/* The embedded headers contain source and dest in reverse order */
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, AF_INET, skb, &ciph);
+	if (!cp)
+		return NF_ACCEPT;
+
+	snet.ip = iph->saddr;
+	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+				    pp, ciph.len, ihl, hooknum);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			     int *related,  unsigned int hooknum,
+			     struct ip_vs_iphdr *ipvsh)
+{
+	struct icmp6hdr	_icmph, *ic;
+	struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	union nf_inet_addr snet;
+	unsigned int offset;
+
+	*related = 1;
+	ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+	/* Fragment header that is before ICMP header tells us that:
+	 * it's not an error message since they can't be fragmented.
+	 */
+	if (ipvsh->flags & IP6_FH_F_FRAG)
+		return NF_DROP;
+
+	IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  &ipvsh->saddr, &ipvsh->daddr);
+
+	if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
+				     true, &ciph))
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(ciph.protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* The embedded headers contain source and dest in reverse order */
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, AF_INET6, skb, &ciph);
+	if (!cp)
+		return NF_ACCEPT;
+
+	snet.in6 = ciph.saddr.in6;
+	offset = ciph.len;
+	return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
+				    pp, offset, sizeof(struct ipv6hdr),
+				    hooknum);
+}
+#endif
+
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+	struct sctp_chunkhdr *sch, schunk;
+	sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
+				 sizeof(schunk), &schunk);
+	if (sch == NULL)
+		return 0;
+	if (sch->type == SCTP_CID_ABORT)
+		return 1;
+	return 0;
+}
+
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
+{
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+	return th->rst;
+}
+
+static inline bool is_new_conn(const struct sk_buff *skb,
+			       struct ip_vs_iphdr *iph)
+{
+	switch (iph->protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+		if (th == NULL)
+			return false;
+		return th->syn;
+	}
+	case IPPROTO_SCTP: {
+		struct sctp_chunkhdr *sch, schunk;
+
+		sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
+					 sizeof(schunk), &schunk);
+		if (sch == NULL)
+			return false;
+		return sch->type == SCTP_CID_INIT;
+	}
+	default:
+		return false;
+	}
+}
+
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+					int conn_reuse_mode)
+{
+	/* Controlled (FTP DATA or persistence)? */
+	if (cp->control)
+		return false;
+
+	switch (cp->protocol) {
+	case IPPROTO_TCP:
+		return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+		       (cp->state == IP_VS_TCP_S_CLOSE) ||
+			((conn_reuse_mode & 2) &&
+			 (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+			 (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+	case IPPROTO_SCTP:
+		return cp->state == IP_VS_SCTP_S_CLOSED;
+	default:
+		return false;
+	}
+}
+
+/* Generic function to create new connections for outgoing RS packets
+ *
+ * Pre-requisites for successful connection creation:
+ * 1) Virtual Service is NOT fwmark based:
+ *    In fwmark-VS actual vaddr and vport are unknown to IPVS
+ * 2) Real Server and Virtual Service were NOT configured without port:
+ *    This is to allow match of different VS to the same RS ip-addr
+ */
+struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
+				      struct ip_vs_dest *dest,
+				      struct sk_buff *skb,
+				      const struct ip_vs_iphdr *iph,
+				      __be16 dport,
+				      __be16 cport)
+{
+	struct ip_vs_conn_param param;
+	struct ip_vs_conn *ct = NULL, *cp = NULL;
+	const union nf_inet_addr *vaddr, *daddr, *caddr;
+	union nf_inet_addr snet;
+	__be16 vport;
+	unsigned int flags;
+
+	EnterFunction(12);
+	vaddr = &svc->addr;
+	vport = svc->port;
+	daddr = &iph->saddr;
+	caddr = &iph->daddr;
+
+	/* check pre-requisites are satisfied */
+	if (svc->fwmark)
+		return NULL;
+	if (!vport || !dport)
+		return NULL;
+
+	/* for persistent service first create connection template */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
+		/* apply netmask the same way ingress-side does */
+#ifdef CONFIG_IP_VS_IPV6
+		if (svc->af == AF_INET6)
+			ipv6_addr_prefix(&snet.in6, &caddr->in6,
+					 (__force __u32)svc->netmask);
+		else
+#endif
+			snet.ip = caddr->ip & svc->netmask;
+		/* fill params and create template if not existent */
+		if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
+						  &snet, 0, vaddr,
+						  vport, &param) < 0)
+			return NULL;
+		ct = ip_vs_ct_in_get(&param);
+		/* check if template exists and points to the same dest */
+		if (!ct || !ip_vs_check_template(ct, dest)) {
+			ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
+					    IP_VS_CONN_F_TEMPLATE, dest, 0);
+			if (!ct) {
+				kfree(param.pe_data);
+				return NULL;
+			}
+			ct->timeout = svc->timeout;
+		} else {
+			kfree(param.pe_data);
+		}
+	}
+
+	/* connection flags */
+	flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
+		 iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
+	/* create connection */
+	ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+			      caddr, cport, vaddr, vport, &param);
+	cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
+	if (!cp) {
+		if (ct)
+			ip_vs_conn_put(ct);
+		return NULL;
+	}
+	if (ct) {
+		ip_vs_control_add(cp, ct);
+		ip_vs_conn_put(ct);
+	}
+	ip_vs_conn_stats(cp, svc);
+
+	/* return connection (will be used to handle outgoing packet) */
+	IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
+		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+		      ip_vs_fwd_tag(cp),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      cp->flags, refcount_read(&cp->refcnt));
+	LeaveFunction(12);
+	return cp;
+}
+
+/* Handle outgoing packets which are considered requests initiated by
+ * real servers, so that subsequent responses from external client can be
+ * routed to the right real server.
+ * Used also for outgoing responses in OPS mode.
+ *
+ * Connection management is handled by persistent-engine specific callback.
+ */
+static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
+					      struct netns_ipvs *ipvs,
+					      int af, struct sk_buff *skb,
+					      const struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *cp = NULL;
+	__be16 _ports[2], *pptr;
+
+	if (hooknum == NF_INET_LOCAL_IN)
+		return NULL;
+
+	pptr = frag_safe_skb_hp(skb, iph->len,
+				sizeof(_ports), _ports);
+	if (!pptr)
+		return NULL;
+
+	dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
+				       &iph->saddr, pptr[0]);
+	if (dest) {
+		struct ip_vs_service *svc;
+		struct ip_vs_pe *pe;
+
+		svc = rcu_dereference(dest->svc);
+		if (svc) {
+			pe = rcu_dereference(svc->pe);
+			if (pe && pe->conn_out)
+				cp = pe->conn_out(svc, dest, skb, iph,
+						  pptr[0], pptr[1]);
+		}
+	}
+
+	return cp;
+}
+
+/* Handle response packets: rewrite addresses and send away...
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+		struct ip_vs_conn *cp, struct ip_vs_iphdr *iph,
+		unsigned int hooknum)
+{
+	struct ip_vs_protocol *pp = pd->pp;
+
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto after_nat;
+
+	IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
+
+	if (skb_ensure_writable(skb, iph->len))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler &&
+	    !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph))
+		goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
+
+	/*
+	 * nf_iterate does not expect change in the skb->dst->dev.
+	 * It looks like it is not fatal to enable this code for hooks
+	 * where our handlers are at the end of the chain list and
+	 * when all next handlers use skb->dst->dev and not outdev.
+	 * It will definitely route properly the inout NAT traffic
+	 * when multiple paths are used.
+	 */
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+	if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
+		goto drop;
+
+	IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
+
+after_nat:
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
+	skb->ipvs_property = 1;
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		ip_vs_notrack(skb);
+	else
+		ip_vs_update_conntrack(skb, cp, 0);
+	ip_vs_conn_put(cp);
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(skb);
+	LeaveFunction(11);
+	return NF_STOLEN;
+}
+
+/*
+ *	Check if outgoing packet belongs to the established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
+{
+	struct netns_ipvs *ipvs = net_ipvs(state->net);
+	unsigned int hooknum = state->hook;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
+	struct ip_vs_conn *cp;
+	int af = state->pf;
+	struct sock *sk;
+
+	EnterFunction(11);
+
+	/* Already marked as IPVS request or reply? */
+	if (skb->ipvs_property)
+		return NF_ACCEPT;
+
+	sk = skb_to_full_sk(skb);
+	/* Bad... Do not break raw sockets */
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
+		     af == AF_INET)) {
+
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+			return NF_ACCEPT;
+	}
+
+	if (unlikely(!skb_dst(skb)))
+		return NF_ACCEPT;
+
+	if (!ipvs->enable)
+		return NF_ACCEPT;
+
+	ip_vs_fill_iph_skb(af, skb, false, &iph);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related;
+			int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
+							hooknum, &iph);
+
+			if (related)
+				return verdict;
+		}
+	} else
+#endif
+		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+			int related;
+			int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
+
+			if (related)
+				return verdict;
+		}
+
+	pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+	if (unlikely(!pd))
+		return NF_ACCEPT;
+	pp = pd->pp;
+
+	/* reassemble IP fragments */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET)
+#endif
+		if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
+			if (ip_vs_gather_frags(ipvs, skb,
+					       ip_vs_defrag_user(hooknum)))
+				return NF_STOLEN;
+
+			ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
+		}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+			     ipvs, af, skb, &iph);
+
+	if (likely(cp))
+		return handle_response(af, skb, pd, cp, &iph, hooknum);
+
+	/* Check for real-server-started requests */
+	if (atomic_read(&ipvs->conn_out_counter)) {
+		/* Currently only for UDP:
+		 * connection oriented protocols typically use
+		 * ephemeral ports for outgoing connections, so
+		 * related incoming responses would not match any VS
+		 */
+		if (pp->protocol == IPPROTO_UDP) {
+			cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
+			if (likely(cp))
+				return handle_response(af, skb, pd, cp, &iph,
+						       hooknum);
+		}
+	}
+
+	if (sysctl_nat_icmp_send(ipvs) &&
+	    (pp->protocol == IPPROTO_TCP ||
+	     pp->protocol == IPPROTO_UDP ||
+	     pp->protocol == IPPROTO_SCTP)) {
+		__be16 _ports[2], *pptr;
+
+		pptr = frag_safe_skb_hp(skb, iph.len,
+					 sizeof(_ports), _ports);
+		if (pptr == NULL)
+			return NF_ACCEPT;	/* Not for me */
+		if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
+					   pptr[0])) {
+			/*
+			 * Notify the real server: there is no
+			 * existing entry if it is not RST
+			 * packet or not TCP packet.
+			 */
+			if ((iph.protocol != IPPROTO_TCP &&
+			     iph.protocol != IPPROTO_SCTP)
+			     || ((iph.protocol == IPPROTO_TCP
+				  && !is_tcp_reset(skb, iph.len))
+				 || (iph.protocol == IPPROTO_SCTP
+					&& !is_sctp_abort(skb,
+						iph.len)))) {
+#ifdef CONFIG_IP_VS_IPV6
+				if (af == AF_INET6) {
+					if (!skb->dev)
+						skb->dev = ipvs->net->loopback_dev;
+					icmpv6_send(skb,
+						    ICMPV6_DEST_UNREACH,
+						    ICMPV6_PORT_UNREACH,
+						    0);
+				} else
+#endif
+					icmp_send(skb,
+						  ICMP_DEST_UNREACH,
+						  ICMP_PORT_UNREACH, 0);
+				return NF_DROP;
+			}
+		}
+	}
+
+	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
+		      "ip_vs_out: packet continues traversal as normal");
+	return NF_ACCEPT;
+}
+
+static unsigned int
+ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		      struct ip_vs_proto_data *pd,
+		      int *verdict, struct ip_vs_conn **cpp,
+		      struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_protocol *pp = pd->pp;
+
+	if (!iph->fragoffs) {
+		/* No (second) fragments need to enter here, as nf_defrag_ipv6
+		 * replayed fragment zero will already have created the cp
+		 */
+
+		/* Schedule and create new connection entry into cpp */
+		if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
+			return 0;
+	}
+
+	if (unlikely(!*cpp)) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
+			      "ip_vs_in: packet continues traversal as normal");
+
+		/* Fragment couldn't be mapped to a conn entry */
+		if (iph->fragoffs)
+			IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
+				      "unhandled fragment");
+
+		*verdict = NF_ACCEPT;
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  unsigned int offset, __u16 af,
+			  const union nf_inet_addr *daddr, __u8 *proto)
+{
+	struct udphdr _udph, *udph;
+	struct ip_vs_dest *dest;
+
+	udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+	if (!udph)
+		goto unk;
+	offset += sizeof(struct udphdr);
+	dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+	if (!dest)
+		goto unk;
+	if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		struct guehdr _gueh, *gueh;
+
+		gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+		if (!gueh)
+			goto unk;
+		if (gueh->control != 0 || gueh->version != 0)
+			goto unk;
+		/* Later we can support also IPPROTO_IPV6 */
+		if (gueh->proto_ctype != IPPROTO_IPIP)
+			goto unk;
+		*proto = gueh->proto_ctype;
+		return sizeof(struct udphdr) + sizeof(struct guehdr) +
+		       (gueh->hlen << 2);
+	}
+
+unk:
+	return 0;
+}
+
+/* Check the GRE tunnel and return its header length */
+static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  unsigned int offset, __u16 af,
+			  const union nf_inet_addr *daddr, __u8 *proto)
+{
+	struct gre_base_hdr _greh, *greh;
+	struct ip_vs_dest *dest;
+
+	greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh);
+	if (!greh)
+		goto unk;
+	dest = ip_vs_find_tunnel(ipvs, af, daddr, 0);
+	if (!dest)
+		goto unk;
+	if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		__be16 type;
+
+		/* Only support version 0 and C (csum) */
+		if ((greh->flags & ~GRE_CSUM) != 0)
+			goto unk;
+		type = greh->protocol;
+		/* Later we can support also IPPROTO_IPV6 */
+		if (type != htons(ETH_P_IP))
+			goto unk;
+		*proto = IPPROTO_IPIP;
+		return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+	}
+
+unk:
+	return 0;
+}
+
+/*
+ *	Handle ICMP messages in the outside-to-inside direction (incoming).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int
+ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
+	      unsigned int hooknum)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
+	unsigned int offset, offset2, ihl, verdict;
+	bool tunnel, new_cp = false;
+	union nf_inet_addr *raddr;
+	char *outer_proto = "IPIP";
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_is_fragment(ip_hdr(skb))) {
+		if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %pI4->%pI4\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  &iph->saddr, &iph->daddr);
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+	raddr = (union nf_inet_addr *)&cih->daddr;
+
+	/* Special case for errors for IPIP/UDP/GRE tunnel packets */
+	tunnel = false;
+	if (cih->protocol == IPPROTO_IPIP) {
+		struct ip_vs_dest *dest;
+
+		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+			return NF_ACCEPT;
+		/* Error for our IPIP must arrive at LOCAL_IN */
+		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
+			return NF_ACCEPT;
+		dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+		/* Only for known tunnel */
+		if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+			return NF_ACCEPT;
+		offset += cih->ihl * 4;
+		cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+		if (cih == NULL)
+			return NF_ACCEPT; /* The packet looks wrong, ignore */
+		tunnel = true;
+	} else if ((cih->protocol == IPPROTO_UDP ||	/* Can be UDP encap */
+		    cih->protocol == IPPROTO_GRE) &&	/* Can be GRE encap */
+		   /* Error for our tunnel must arrive at LOCAL_IN */
+		   (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+		__u8 iproto;
+		int ulen;
+
+		/* Non-first fragment has no UDP/GRE header */
+		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+			return NF_ACCEPT;
+		offset2 = offset + cih->ihl * 4;
+		if (cih->protocol == IPPROTO_UDP) {
+			ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET,
+					      raddr, &iproto);
+			outer_proto = "UDP";
+		} else {
+			ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET,
+					      raddr, &iproto);
+			outer_proto = "GRE";
+		}
+		if (ulen > 0) {
+			/* Skip IP and UDP/GRE tunnel headers */
+			offset = offset2 + ulen;
+			/* Now we should be at the original IP header */
+			cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+						 &_ciph);
+			if (cih && cih->version == 4 && cih->ihl >= 5 &&
+			    iproto == IPPROTO_IPIP)
+				tunnel = true;
+			else
+				return NF_ACCEPT;
+		}
+	}
+
+	pd = ip_vs_proto_data_get(ipvs, cih->protocol);
+	if (!pd)
+		return NF_ACCEPT;
+	pp = pd->pp;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+		      "Checking incoming ICMP for");
+
+	offset2 = offset;
+	ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !tunnel, &ciph);
+	offset = ciph.len;
+
+	/* The embedded headers contain source and dest in reverse order.
+	 * For IPIP/UDP/GRE tunnel this is error for request, not for reply.
+	 */
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, AF_INET, skb, &ciph);
+
+	if (!cp) {
+		int v;
+
+		if (tunnel || !sysctl_schedule_icmp(ipvs))
+			return NF_ACCEPT;
+
+		if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
+			return v;
+		new_cp = true;
+	}
+
+	verdict = NF_DROP;
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %pI4!\n",
+			  &iph->saddr);
+		goto out;
+	}
+
+	if (tunnel) {
+		__be32 info = ic->un.gateway;
+		__u8 type = ic->type;
+		__u8 code = ic->code;
+
+		/* Update the MTU */
+		if (ic->type == ICMP_DEST_UNREACH &&
+		    ic->code == ICMP_FRAG_NEEDED) {
+			struct ip_vs_dest *dest = cp->dest;
+			u32 mtu = ntohs(ic->un.frag.mtu);
+			__be16 frag_off = cih->frag_off;
+
+			/* Strip outer IP and ICMP, go to IPIP/UDP/GRE header */
+			if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL)
+				goto ignore_tunnel;
+			offset2 -= ihl + sizeof(_icmph);
+			skb_reset_network_header(skb);
+			IP_VS_DBG(12, "ICMP for %s %pI4->%pI4: mtu=%u\n",
+				  outer_proto, &ip_hdr(skb)->saddr,
+				  &ip_hdr(skb)->daddr, mtu);
+			ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0);
+			/* Client uses PMTUD? */
+			if (!(frag_off & htons(IP_DF)))
+				goto ignore_tunnel;
+			/* Prefer the resulting PMTU */
+			if (dest) {
+				struct ip_vs_dest_dst *dest_dst;
+
+				dest_dst = rcu_dereference(dest->dest_dst);
+				if (dest_dst)
+					mtu = dst_mtu(dest_dst->dst_cache);
+			}
+			if (mtu > 68 + sizeof(struct iphdr))
+				mtu -= sizeof(struct iphdr);
+			info = htonl(mtu);
+		}
+		/* Strip outer IP, ICMP and IPIP/UDP/GRE, go to IP header of
+		 * original request.
+		 */
+		if (pskb_pull(skb, offset2) == NULL)
+			goto ignore_tunnel;
+		skb_reset_network_header(skb);
+		IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
+			&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+			type, code, ntohl(info));
+		icmp_send(skb, type, code, info);
+		/* ICMP can be shorter but anyways, account it */
+		ip_vs_out_stats(cp, skb);
+
+ignore_tunnel:
+		consume_skb(skb);
+		verdict = NF_STOLEN;
+		goto out;
+	}
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol ||
+	    IPPROTO_SCTP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
+
+out:
+	if (likely(!new_cp))
+		__ip_vs_conn_put(cp);
+	else
+		ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			    int *related, unsigned int hooknum,
+			    struct ip_vs_iphdr *iph)
+{
+	struct icmp6hdr	_icmph, *ic;
+	struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
+	unsigned int offset, verdict;
+	bool new_cp = false;
+
+	*related = 1;
+
+	ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+	/* Fragment header that is before ICMP header tells us that:
+	 * it's not an error message since they can't be fragmented.
+	 */
+	if (iph->flags & IP6_FH_F_FRAG)
+		return NF_DROP;
+
+	IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  &iph->saddr, &iph->daddr);
+
+	offset = iph->len + sizeof(_icmph);
+	if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
+		return NF_ACCEPT;
+
+	pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
+	if (!pd)
+		return NF_ACCEPT;
+	pp = pd->pp;
+
+	/* Cannot handle fragmented embedded protocol */
+	if (ciph.fragoffs)
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+		      "Checking incoming ICMPv6 for");
+
+	/* The embedded headers contain source and dest in reverse order
+	 * if not from localhost
+	 */
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, AF_INET6, skb, &ciph);
+
+	if (!cp) {
+		int v;
+
+		if (!sysctl_schedule_icmp(ipvs))
+			return NF_ACCEPT;
+
+		if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
+			return v;
+
+		new_cp = true;
+	}
+
+	/* VS/TUN, VS/DR and LOCALNODE just let it go */
+	if ((hooknum == NF_INET_LOCAL_OUT) &&
+	    (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
+		verdict = NF_ACCEPT;
+		goto out;
+	}
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+
+	/* Need to mangle contained IPv6 header in ICMPv6 packet */
+	offset = ciph.len;
+	if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
+	    IPPROTO_SCTP == ciph.protocol)
+		offset += 2 * sizeof(__u16); /* Also mangle ports */
+
+	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
+
+out:
+	if (likely(!new_cp))
+		__ip_vs_conn_put(cp);
+	else
+		ip_vs_conn_put(cp);
+
+	return verdict;
+}
+#endif
+
+
+/*
+ *	Check if it's for virtual services, look it up,
+ *	and send it on its way...
+ */
+static unsigned int
+ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
+{
+	struct netns_ipvs *ipvs = net_ipvs(state->net);
+	unsigned int hooknum = state->hook;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
+	struct ip_vs_conn *cp;
+	int ret, pkts;
+	struct sock *sk;
+	int af = state->pf;
+
+	/* Already marked as IPVS request or reply? */
+	if (skb->ipvs_property)
+		return NF_ACCEPT;
+
+	/*
+	 *	Big tappo:
+	 *	- remote client: only PACKET_HOST
+	 *	- route: used for struct net when skb->dev is unset
+	 */
+	if (unlikely((skb->pkt_type != PACKET_HOST &&
+		      hooknum != NF_INET_LOCAL_OUT) ||
+		     !skb_dst(skb))) {
+		ip_vs_fill_iph_skb(af, skb, false, &iph);
+		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
+			      " ignored in hook %u\n",
+			      skb->pkt_type, iph.protocol,
+			      IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
+		return NF_ACCEPT;
+	}
+	/* ipvs enabled in this netns ? */
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+		return NF_ACCEPT;
+
+	ip_vs_fill_iph_skb(af, skb, false, &iph);
+
+	/* Bad... Do not break raw sockets */
+	sk = skb_to_full_sk(skb);
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
+		     af == AF_INET)) {
+
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+			return NF_ACCEPT;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related;
+			int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
+						       hooknum, &iph);
+
+			if (related)
+				return verdict;
+		}
+	} else
+#endif
+		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+			int related;
+			int verdict = ip_vs_in_icmp(ipvs, skb, &related,
+						    hooknum);
+
+			if (related)
+				return verdict;
+		}
+
+	/* Protocol supported? */
+	pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+	if (unlikely(!pd)) {
+		/* The only way we'll see this packet again is if it's
+		 * encapsulated, so mark it with ipvs_property=1 so we
+		 * skip it if we're ignoring tunneled packets
+		 */
+		if (sysctl_ignore_tunneled(ipvs))
+			skb->ipvs_property = 1;
+
+		return NF_ACCEPT;
+	}
+	pp = pd->pp;
+	/*
+	 * Check if the packet belongs to an existing connection entry
+	 */
+	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+			     ipvs, af, skb, &iph);
+
+	if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+		int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+		bool old_ct = false, resched = false;
+
+		if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+		    unlikely(!atomic_read(&cp->dest->weight))) {
+			resched = true;
+			old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
+		} else if (conn_reuse_mode &&
+			   is_new_conn_expected(cp, conn_reuse_mode)) {
+			old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
+			if (!atomic_read(&cp->n_control)) {
+				resched = true;
+			} else {
+				/* Do not reschedule controlling connection
+				 * that uses conntrack while it is still
+				 * referenced by controlled connection(s).
+				 */
+				resched = !old_ct;
+			}
+		}
+
+		if (resched) {
+			if (!old_ct)
+				cp->flags &= ~IP_VS_CONN_F_NFCT;
+			if (!atomic_read(&cp->n_control))
+				ip_vs_conn_expire_now(cp);
+			__ip_vs_conn_put(cp);
+			if (old_ct)
+				return NF_DROP;
+			cp = NULL;
+		}
+	}
+
+	/* Check the server status */
+	if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		/* the destination server is not available */
+		if (sysctl_expire_nodest_conn(ipvs)) {
+			bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
+
+			if (!old_ct)
+				cp->flags &= ~IP_VS_CONN_F_NFCT;
+
+			ip_vs_conn_expire_now(cp);
+			__ip_vs_conn_put(cp);
+			if (old_ct)
+				return NF_DROP;
+			cp = NULL;
+		} else {
+			__ip_vs_conn_put(cp);
+			return NF_DROP;
+		}
+	}
+
+	if (unlikely(!cp)) {
+		int v;
+
+		if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
+			return v;
+	}
+
+	IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
+
+	ip_vs_in_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
+	if (cp->packet_xmit)
+		ret = cp->packet_xmit(skb, cp, pp, &iph);
+		/* do not touch skb anymore */
+	else {
+		IP_VS_DBG_RL("warning: packet_xmit is null");
+		ret = NF_ACCEPT;
+	}
+
+	/* Increase its packet counter and check if it is needed
+	 * to be synchronized
+	 *
+	 * Sync connection if it is about to close to
+	 * encorage the standby servers to update the connections timeout
+	 *
+	 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
+	 */
+
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		pkts = sysctl_sync_threshold(ipvs);
+	else
+		pkts = atomic_inc_return(&cp->in_pkts);
+
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(ipvs, cp, pkts);
+	else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+		/* increment is done inside ip_vs_sync_conn too */
+		atomic_inc(&cp->control->in_pkts);
+
+	ip_vs_conn_put(cp);
+	return ret;
+}
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
+ *      related packets destined for 0.0.0.0/0.
+ *      When fwmark-based virtual service is used, such as transparent
+ *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
+ *      and send them to ip_vs_in_icmp.
+ */
+static unsigned int
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
+		   const struct nf_hook_state *state)
+{
+	struct netns_ipvs *ipvs = net_ipvs(state->net);
+	int r;
+
+	/* ipvs enabled in this netns ? */
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+		return NF_ACCEPT;
+
+	if (state->pf == NFPROTO_IPV4) {
+		if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+			return NF_ACCEPT;
+#ifdef CONFIG_IP_VS_IPV6
+	} else {
+		struct ip_vs_iphdr iphdr;
+
+		ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
+
+		if (iphdr.protocol != IPPROTO_ICMPV6)
+			return NF_ACCEPT;
+
+		return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
+#endif
+	}
+
+	return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
+}
+
+static const struct nf_hook_ops ip_vs_ops4[] = {
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC - 2,
+	},
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC - 1,
+	},
+	/* Before ip_vs_in, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST + 1,
+	},
+	/* After mangle, schedule and forward local requests */
+	{
+		.hook		= ip_vs_in_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST + 2,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 99,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 100,
+	},
+};
+
+#ifdef CONFIG_IP_VS_IPV6
+static const struct nf_hook_ops ip_vs_ops6[] = {
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC - 2,
+	},
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC - 1,
+	},
+	/* Before ip_vs_in, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST + 1,
+	},
+	/* After mangle, schedule and forward local requests */
+	{
+		.hook		= ip_vs_in_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST + 2,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 99,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 100,
+	},
+};
+#endif
+
+int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af)
+{
+	const struct nf_hook_ops *ops;
+	unsigned int count;
+	unsigned int afmask;
+	int ret = 0;
+
+	if (af == AF_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+		ops = ip_vs_ops6;
+		count = ARRAY_SIZE(ip_vs_ops6);
+		afmask = 2;
+#else
+		return -EINVAL;
+#endif
+	} else {
+		ops = ip_vs_ops4;
+		count = ARRAY_SIZE(ip_vs_ops4);
+		afmask = 1;
+	}
+
+	if (!(ipvs->hooks_afmask & afmask)) {
+		ret = nf_register_net_hooks(ipvs->net, ops, count);
+		if (ret >= 0)
+			ipvs->hooks_afmask |= afmask;
+	}
+	return ret;
+}
+
+void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af)
+{
+	const struct nf_hook_ops *ops;
+	unsigned int count;
+	unsigned int afmask;
+
+	if (af == AF_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+		ops = ip_vs_ops6;
+		count = ARRAY_SIZE(ip_vs_ops6);
+		afmask = 2;
+#else
+		return;
+#endif
+	} else {
+		ops = ip_vs_ops4;
+		count = ARRAY_SIZE(ip_vs_ops4);
+		afmask = 1;
+	}
+
+	if (ipvs->hooks_afmask & afmask) {
+		nf_unregister_net_hooks(ipvs->net, ops, count);
+		ipvs->hooks_afmask &= ~afmask;
+	}
+}
+
+/*
+ *	Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+	struct netns_ipvs *ipvs;
+
+	ipvs = net_generic(net, ip_vs_net_id);
+	if (ipvs == NULL)
+		return -ENOMEM;
+
+	/* Hold the beast until a service is registered */
+	ipvs->enable = 0;
+	ipvs->net = net;
+	/* Counters used for creating unique names */
+	ipvs->gen = atomic_read(&ipvs_netns_cnt);
+	atomic_inc(&ipvs_netns_cnt);
+	net->ipvs = ipvs;
+
+	if (ip_vs_estimator_net_init(ipvs) < 0)
+		goto estimator_fail;
+
+	if (ip_vs_control_net_init(ipvs) < 0)
+		goto control_fail;
+
+	if (ip_vs_protocol_net_init(ipvs) < 0)
+		goto protocol_fail;
+
+	if (ip_vs_app_net_init(ipvs) < 0)
+		goto app_fail;
+
+	if (ip_vs_conn_net_init(ipvs) < 0)
+		goto conn_fail;
+
+	if (ip_vs_sync_net_init(ipvs) < 0)
+		goto sync_fail;
+
+	return 0;
+/*
+ * Error handling
+ */
+
+sync_fail:
+	ip_vs_conn_net_cleanup(ipvs);
+conn_fail:
+	ip_vs_app_net_cleanup(ipvs);
+app_fail:
+	ip_vs_protocol_net_cleanup(ipvs);
+protocol_fail:
+	ip_vs_control_net_cleanup(ipvs);
+control_fail:
+	ip_vs_estimator_net_cleanup(ipvs);
+estimator_fail:
+	net->ipvs = NULL;
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
+{
+	struct netns_ipvs *ipvs;
+	struct net *net;
+
+	ip_vs_service_nets_cleanup(net_list);	/* ip_vs_flush() with locks */
+	list_for_each_entry(net, net_list, exit_list) {
+		ipvs = net_ipvs(net);
+		ip_vs_conn_net_cleanup(ipvs);
+		ip_vs_app_net_cleanup(ipvs);
+		ip_vs_protocol_net_cleanup(ipvs);
+		ip_vs_control_net_cleanup(ipvs);
+		ip_vs_estimator_net_cleanup(ipvs);
+		IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+		net->ipvs = NULL;
+	}
+}
+
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
+{
+	struct netns_ipvs *ipvs;
+	struct net *net;
+
+	EnterFunction(2);
+	list_for_each_entry(net, net_list, exit_list) {
+		ipvs = net_ipvs(net);
+		ip_vs_unregister_hooks(ipvs, AF_INET);
+		ip_vs_unregister_hooks(ipvs, AF_INET6);
+		ipvs->enable = 0;	/* Disable packet reception */
+		smp_wmb();
+		ip_vs_sync_net_cleanup(ipvs);
+	}
+	LeaveFunction(2);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+	.init = __ip_vs_init,
+	.exit_batch = __ip_vs_cleanup_batch,
+	.id   = &ip_vs_net_id,
+	.size = sizeof(struct netns_ipvs),
+};
+
+static struct pernet_operations ipvs_core_dev_ops = {
+	.exit_batch = __ip_vs_dev_cleanup_batch,
+};
+
+/*
+ *	Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+	int ret;
+
+	ret = ip_vs_control_init();
+	if (ret < 0) {
+		pr_err("can't setup control.\n");
+		goto exit;
+	}
+
+	ip_vs_protocol_init();
+
+	ret = ip_vs_conn_init();
+	if (ret < 0) {
+		pr_err("can't setup connection table.\n");
+		goto cleanup_protocol;
+	}
+
+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		goto cleanup_conn;
+
+	ret = register_pernet_device(&ipvs_core_dev_ops);
+	if (ret < 0)
+		goto cleanup_sub;
+
+	ret = ip_vs_register_nl_ioctl();
+	if (ret < 0) {
+		pr_err("can't register netlink/ioctl.\n");
+		goto cleanup_dev;
+	}
+
+	pr_info("ipvs loaded.\n");
+
+	return ret;
+
+cleanup_dev:
+	unregister_pernet_device(&ipvs_core_dev_ops);
+cleanup_sub:
+	unregister_pernet_subsys(&ipvs_core_ops);
+cleanup_conn:
+	ip_vs_conn_cleanup();
+cleanup_protocol:
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+exit:
+	return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+	ip_vs_unregister_nl_ioctl();
+	unregister_pernet_device(&ipvs_core_dev_ops);
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
+	ip_vs_conn_cleanup();
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+	pr_info("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
new file mode 100644
index 000000000..17a1b731a
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -0,0 +1,4288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * Changes:
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/mutex.h>
+
+#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
+#include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+#include <linux/uaccess.h>
+
+#include <net/ip_vs.h>
+
+MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME);
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DEFINE_MUTEX(__ip_vs_mutex);
+
+/* sysctl variables */
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+	return sysctl_ip_vs_debug_level;
+}
+#endif
+
+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
+
+
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static bool __ip_vs_addr_is_local_v6(struct net *net,
+				     const struct in6_addr *addr)
+{
+	struct flowi6 fl6 = {
+		.daddr = *addr,
+	};
+	struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
+	bool is_local;
+
+	is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
+
+	dst_release(dst);
+	return is_local;
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+/*
+ *	update_defense_level is called from keventd and from sysctl,
+ *	so it needs to protect itself from softirqs
+ */
+static void update_defense_level(struct netns_ipvs *ipvs)
+{
+	struct sysinfo i;
+	int availmem;
+	int nomem;
+	int to_change = -1;
+
+	/* we only count free and buffered memory (in pages) */
+	si_meminfo(&i);
+	availmem = i.freeram + i.bufferram;
+	/* however in linux 2.5 the i.bufferram is total page cache size,
+	   we need adjust it */
+	/* si_swapinfo(&i); */
+	/* availmem = availmem - (i.totalswap - i.freeswap); */
+
+	nomem = (availmem < ipvs->sysctl_amemthresh);
+
+	local_bh_disable();
+
+	/* drop_entry */
+	spin_lock(&ipvs->dropentry_lock);
+	switch (ipvs->sysctl_drop_entry) {
+	case 0:
+		atomic_set(&ipvs->dropentry, 0);
+		break;
+	case 1:
+		if (nomem) {
+			atomic_set(&ipvs->dropentry, 1);
+			ipvs->sysctl_drop_entry = 2;
+		} else {
+			atomic_set(&ipvs->dropentry, 0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			atomic_set(&ipvs->dropentry, 1);
+		} else {
+			atomic_set(&ipvs->dropentry, 0);
+			ipvs->sysctl_drop_entry = 1;
+		}
+		break;
+	case 3:
+		atomic_set(&ipvs->dropentry, 1);
+		break;
+	}
+	spin_unlock(&ipvs->dropentry_lock);
+
+	/* drop_packet */
+	spin_lock(&ipvs->droppacket_lock);
+	switch (ipvs->sysctl_drop_packet) {
+	case 0:
+		ipvs->drop_rate = 0;
+		break;
+	case 1:
+		if (nomem) {
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
+			ipvs->sysctl_drop_packet = 2;
+		} else {
+			ipvs->drop_rate = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
+		} else {
+			ipvs->drop_rate = 0;
+			ipvs->sysctl_drop_packet = 1;
+		}
+		break;
+	case 3:
+		ipvs->drop_rate = ipvs->sysctl_am_droprate;
+		break;
+	}
+	spin_unlock(&ipvs->droppacket_lock);
+
+	/* secure_tcp */
+	spin_lock(&ipvs->securetcp_lock);
+	switch (ipvs->sysctl_secure_tcp) {
+	case 0:
+		if (ipvs->old_secure_tcp >= 2)
+			to_change = 0;
+		break;
+	case 1:
+		if (nomem) {
+			if (ipvs->old_secure_tcp < 2)
+				to_change = 1;
+			ipvs->sysctl_secure_tcp = 2;
+		} else {
+			if (ipvs->old_secure_tcp >= 2)
+				to_change = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			if (ipvs->old_secure_tcp < 2)
+				to_change = 1;
+		} else {
+			if (ipvs->old_secure_tcp >= 2)
+				to_change = 0;
+			ipvs->sysctl_secure_tcp = 1;
+		}
+		break;
+	case 3:
+		if (ipvs->old_secure_tcp < 2)
+			to_change = 1;
+		break;
+	}
+	ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
+	if (to_change >= 0)
+		ip_vs_protocol_timeout_change(ipvs,
+					      ipvs->sysctl_secure_tcp > 1);
+	spin_unlock(&ipvs->securetcp_lock);
+
+	local_bh_enable();
+}
+
+/* Handler for delayed work for expiring no
+ * destination connections
+ */
+static void expire_nodest_conn_handler(struct work_struct *work)
+{
+	struct netns_ipvs *ipvs;
+
+	ipvs = container_of(work, struct netns_ipvs,
+			    expire_nodest_conn_work.work);
+	ip_vs_expire_nodest_conn_flush(ipvs);
+}
+
+/*
+ *	Timer for checking the defense
+ */
+#define DEFENSE_TIMER_PERIOD	1*HZ
+
+static void defense_work_handler(struct work_struct *work)
+{
+	struct netns_ipvs *ipvs =
+		container_of(work, struct netns_ipvs, defense_work.work);
+
+	update_defense_level(ipvs);
+	if (atomic_read(&ipvs->dropentry))
+		ip_vs_random_dropentry(ipvs);
+	queue_delayed_work(system_long_wq, &ipvs->defense_work,
+			   DEFENSE_TIMER_PERIOD);
+}
+#endif
+
+int
+ip_vs_use_count_inc(void)
+{
+	return try_module_get(THIS_MODULE);
+}
+
+void
+ip_vs_use_count_dec(void)
+{
+	module_put(THIS_MODULE);
+}
+
+
+/*
+ *	Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+
+/*
+ *	Returns hash value for virtual service
+ */
+static inline unsigned int
+ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
+		  const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+	__u32 ahash;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+	ahash = ntohl(addr_fold);
+	ahash ^= ((size_t) ipvs >> 8);
+
+	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
+	       IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Returns hash value of fwmark for virtual service lookup
+ */
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
+{
+	return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
+ *	or in the ip_vs_svc_fwm_table by fwmark.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+	unsigned int hash;
+
+	if (svc->flags & IP_VS_SVC_F_HASHED) {
+		pr_err("%s(): request for already hashed, called from %pS\n",
+		       __func__, __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
+		 */
+		hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
+					 &svc->addr, svc->port);
+		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
+	} else {
+		/*
+		 *  Hash it by fwmark in svc_fwm_table
+		 */
+		hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
+		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+	}
+
+	svc->flags |= IP_VS_SVC_F_HASHED;
+	/* increase its refcnt because it is referenced by the svc table */
+	atomic_inc(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Unhashes a service from svc_table / svc_fwm_table.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+		pr_err("%s(): request for unhash flagged, called from %pS\n",
+		       __func__, __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/* Remove it from the svc_table table */
+		hlist_del_rcu(&svc->s_list);
+	} else {
+		/* Remove it from the svc_fwm_table table */
+		hlist_del_rcu(&svc->f_list);
+	}
+
+	svc->flags &= ~IP_VS_SVC_F_HASHED;
+	atomic_dec(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Get service by {netns, proto,addr,port} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
+		     const union nf_inet_addr *vaddr, __be16 vport)
+{
+	unsigned int hash;
+	struct ip_vs_service *svc;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
+
+	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
+		if ((svc->af == af)
+		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
+		    && (svc->port == vport)
+		    && (svc->protocol == protocol)
+		    && (svc->ipvs == ipvs)) {
+			/* HIT */
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Get service by {fwmark} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
+{
+	unsigned int hash;
+	struct ip_vs_service *svc;
+
+	/* Check for fwmark addressed entries */
+	hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
+
+	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+		if (svc->fwmark == fwmark && svc->af == af
+		    && (svc->ipvs == ipvs)) {
+			/* HIT */
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+/* Find service, called under RCU lock */
+struct ip_vs_service *
+ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
+		   const union nf_inet_addr *vaddr, __be16 vport)
+{
+	struct ip_vs_service *svc;
+
+	/*
+	 *	Check the table hashed by fwmark first
+	 */
+	if (fwmark) {
+		svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
+		if (svc)
+			goto out;
+	}
+
+	/*
+	 *	Check the table hashed by <protocol,addr,port>
+	 *	for "full" addressed entries
+	 */
+	svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
+
+	if (!svc && protocol == IPPROTO_TCP &&
+	    atomic_read(&ipvs->ftpsvc_counter) &&
+	    (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) {
+		/*
+		 * Check if ftp service entry exists, the packet
+		 * might belong to FTP data connections.
+		 */
+		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
+	}
+
+	if (svc == NULL
+	    && atomic_read(&ipvs->nullsvc_counter)) {
+		/*
+		 * Check if the catch-all port (port zero) exists
+		 */
+		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
+	}
+
+  out:
+	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+		      fwmark, ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+		      svc ? "hit" : "not hit");
+
+	return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	atomic_inc(&svc->refcnt);
+	rcu_assign_pointer(dest->svc, svc);
+}
+
+static void ip_vs_service_free(struct ip_vs_service *svc)
+{
+	free_percpu(svc->stats.cpustats);
+	kfree(svc);
+}
+
+static void ip_vs_service_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_service *svc;
+
+	svc = container_of(head, struct ip_vs_service, rcu_head);
+	ip_vs_service_free(svc);
+}
+
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
+	if (atomic_dec_and_test(&svc->refcnt)) {
+		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
+			      svc->fwmark,
+			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
+			      ntohs(svc->port));
+		if (do_delay)
+			call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+		else
+			ip_vs_service_free(svc);
+	}
+}
+
+
+/*
+ *	Returns hash value for real service
+ */
+static inline unsigned int ip_vs_rs_hashkey(int af,
+					    const union nf_inet_addr *addr,
+					    __be16 port)
+{
+	unsigned int porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
+		& IP_VS_RTAB_MASK;
+}
+
+/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
+static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
+{
+	unsigned int hash;
+	__be16 port;
+
+	if (dest->in_rs_table)
+		return;
+
+	switch (IP_VS_DFWD_METHOD(dest)) {
+	case IP_VS_CONN_F_MASQ:
+		port = dest->port;
+		break;
+	case IP_VS_CONN_F_TUNNEL:
+		switch (dest->tun_type) {
+		case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+			port = dest->tun_port;
+			break;
+		case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+		case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
+			port = 0;
+			break;
+		default:
+			return;
+		}
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 *	Hash by proto,addr,port,
+	 *	which are the parameters of the real service.
+	 */
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
+
+	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
+	dest->in_rs_table = 1;
+}
+
+/* Unhash ip_vs_dest from rs_table. */
+static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+	/*
+	 * Remove it from the rs_table table.
+	 */
+	if (dest->in_rs_table) {
+		hlist_del_rcu(&dest->d_list);
+		dest->in_rs_table = 0;
+	}
+}
+
+/* Check if real service by <proto,addr,port> is present */
+bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
+			    const union nf_inet_addr *daddr, __be16 dport)
+{
+	unsigned int hash;
+	struct ip_vs_dest *dest;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->port == dport &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+			/* HIT */
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/* Find real service record by <proto,addr,port>.
+ * In case of multiple records with the same <proto,addr,port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
+					   __u16 protocol,
+					   const union nf_inet_addr *daddr,
+					   __be16 dport)
+{
+	unsigned int hash;
+	struct ip_vs_dest *dest;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->port == dport &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+				     const union nf_inet_addr *daddr,
+				     __be16 tun_port)
+{
+	struct ip_vs_dest *dest;
+	unsigned int hash;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->tun_port == tun_port &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/* Lookup destination by {addr,port} in the given service
+ * Called under RCU lock.
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+		  const union nf_inet_addr *daddr, __be16 dport)
+{
+	struct ip_vs_dest *dest;
+
+	/*
+	 * Find the destination for the given service
+	 */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if ((dest->af == dest_af) &&
+		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+		    (dest->port == dport)) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Created to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ * Called under RCU lock, no refcnt is returned.
+ */
+struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
+				   const union nf_inet_addr *daddr,
+				   __be16 dport,
+				   const union nf_inet_addr *vaddr,
+				   __be16 vport, __u16 protocol, __u32 fwmark,
+				   __u32 flags)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_service *svc;
+	__be16 port = dport;
+
+	svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
+	if (!svc)
+		return NULL;
+	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
+		port = 0;
+	dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
+	if (!dest)
+		dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
+	return dest;
+}
+
+void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_dest_dst *dest_dst = container_of(head,
+						       struct ip_vs_dest_dst,
+						       rcu_head);
+
+	dst_release(dest_dst->dst_cache);
+	kfree(dest_dst);
+}
+
+/* Release dest_dst and dst_cache for dest in user context */
+static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_dst *old;
+
+	old = rcu_dereference_protected(dest->dest_dst, 1);
+	if (old) {
+		RCU_INIT_POINTER(dest->dest_dst, NULL);
+		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
+	}
+}
+
+/*
+ *  Lookup dest by {svc,addr,port} in the destination trash.
+ *  The destination trash is used to hold the destinations that are removed
+ *  from the service table but are still referenced by some conn entries.
+ *  The reason to add the destination trash is when the dest is temporary
+ *  down (either by administrator or by monitor program), the dest can be
+ *  picked back from the trash, the remaining connections to the dest can
+ *  continue, and the counting information of the dest is also useful for
+ *  scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+		     const union nf_inet_addr *daddr, __be16 dport)
+{
+	struct ip_vs_dest *dest;
+	struct netns_ipvs *ipvs = svc->ipvs;
+
+	/*
+	 * Find the destination in trash
+	 */
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
+		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+			      "dest->refcnt=%d\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      refcount_read(&dest->refcnt));
+		if (dest->af == dest_af &&
+		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+		    dest->port == dport &&
+		    dest->vfwmark == svc->fwmark &&
+		    dest->protocol == svc->protocol &&
+		    (svc->fwmark ||
+		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
+		      dest->vport == svc->port))) {
+			/* HIT */
+			list_del(&dest->t_list);
+			goto out;
+		}
+	}
+
+	dest = NULL;
+
+out:
+	spin_unlock_bh(&ipvs->dest_trash_lock);
+
+	return dest;
+}
+
+static void ip_vs_dest_free(struct ip_vs_dest *dest)
+{
+	struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
+	__ip_vs_dst_cache_reset(dest);
+	__ip_vs_svc_put(svc, false);
+	free_percpu(dest->stats.cpustats);
+	ip_vs_dest_put_and_free(dest);
+}
+
+/*
+ *  Clean up all the destinations in the trash
+ *  Called by the ip_vs_control_cleanup()
+ *
+ *  When the ip_vs_control_clearup is activated by ipvs module exit,
+ *  the service tables must have been flushed and all the connections
+ *  are expired, and the refcnt of each destination in the trash must
+ *  be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	del_timer_sync(&ipvs->dest_trash_timer);
+	/* No need to use dest_trash_lock */
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
+		list_del(&dest->t_list);
+		ip_vs_dest_free(dest);
+	}
+}
+
+static void
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
+{
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
+
+	spin_lock_bh(&src->lock);
+
+	IP_VS_SHOW_STATS_COUNTER(conns);
+	IP_VS_SHOW_STATS_COUNTER(inpkts);
+	IP_VS_SHOW_STATS_COUNTER(outpkts);
+	IP_VS_SHOW_STATS_COUNTER(inbytes);
+	IP_VS_SHOW_STATS_COUNTER(outbytes);
+
+	ip_vs_read_estimator(dst, src);
+
+	spin_unlock_bh(&src->lock);
+}
+
+static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+	dst->conns = (u32)src->conns;
+	dst->inpkts = (u32)src->inpkts;
+	dst->outpkts = (u32)src->outpkts;
+	dst->inbytes = src->inbytes;
+	dst->outbytes = src->outbytes;
+	dst->cps = (u32)src->cps;
+	dst->inpps = (u32)src->inpps;
+	dst->outpps = (u32)src->outpps;
+	dst->inbps = (u32)src->inbps;
+	dst->outbps = (u32)src->outbps;
+}
+
+static void
+ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+	spin_lock_bh(&stats->lock);
+
+	/* get current counters as zero point, rates are zeroed */
+
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
+
+	IP_VS_ZERO_STATS_COUNTER(conns);
+	IP_VS_ZERO_STATS_COUNTER(inpkts);
+	IP_VS_ZERO_STATS_COUNTER(outpkts);
+	IP_VS_ZERO_STATS_COUNTER(inbytes);
+	IP_VS_ZERO_STATS_COUNTER(outbytes);
+
+	ip_vs_zero_estimator(stats);
+
+	spin_unlock_bh(&stats->lock);
+}
+
+/*
+ *	Update a destination in the given service
+ */
+static void
+__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
+		    struct ip_vs_dest_user_kern *udest, int add)
+{
+	struct netns_ipvs *ipvs = svc->ipvs;
+	struct ip_vs_service *old_svc;
+	struct ip_vs_scheduler *sched;
+	int conn_flags;
+
+	/* We cannot modify an address and change the address family */
+	BUG_ON(!add && udest->af != dest->af);
+
+	if (add && udest->af != svc->af)
+		ipvs->mixed_address_family_dests++;
+
+	/* keep the last_weight with latest non-0 weight */
+	if (add || udest->weight != 0)
+		atomic_set(&dest->last_weight, udest->weight);
+
+	/* set the weight and the flags */
+	atomic_set(&dest->weight, udest->weight);
+	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
+	conn_flags |= IP_VS_CONN_F_INACTIVE;
+
+	/* Need to rehash? */
+	if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+	    IP_VS_DFWD_METHOD(dest) ||
+	    udest->tun_type != dest->tun_type ||
+	    udest->tun_port != dest->tun_port)
+		ip_vs_rs_unhash(dest);
+
+	/* set the tunnel info */
+	dest->tun_type = udest->tun_type;
+	dest->tun_port = udest->tun_port;
+	dest->tun_flags = udest->tun_flags;
+
+	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
+		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+	} else {
+		/* FTP-NAT requires conntrack for mangling */
+		if (svc->port == FTPPORT)
+			ip_vs_register_conntrack(svc);
+	}
+	atomic_set(&dest->conn_flags, conn_flags);
+	/* Put the real service in rs_table if not present. */
+	ip_vs_rs_hash(ipvs, dest);
+
+	/* bind the service */
+	old_svc = rcu_dereference_protected(dest->svc, 1);
+	if (!old_svc) {
+		__ip_vs_bind_svc(dest, svc);
+	} else {
+		if (old_svc != svc) {
+			ip_vs_zero_stats(&dest->stats);
+			__ip_vs_bind_svc(dest, svc);
+			__ip_vs_svc_put(old_svc, true);
+		}
+	}
+
+	/* set the dest status flags */
+	dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
+		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	dest->u_threshold = udest->u_threshold;
+	dest->l_threshold = udest->l_threshold;
+
+	dest->af = udest->af;
+
+	spin_lock_bh(&dest->dst_lock);
+	__ip_vs_dst_cache_reset(dest);
+	spin_unlock_bh(&dest->dst_lock);
+
+	if (add) {
+		ip_vs_start_estimator(svc->ipvs, &dest->stats);
+		list_add_rcu(&dest->n_list, &svc->destinations);
+		svc->num_dests++;
+		sched = rcu_dereference_protected(svc->scheduler, 1);
+		if (sched && sched->add_dest)
+			sched->add_dest(svc, dest);
+	} else {
+		sched = rcu_dereference_protected(svc->scheduler, 1);
+		if (sched && sched->upd_dest)
+			sched->upd_dest(svc, dest);
+	}
+}
+
+
+/*
+ *	Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	unsigned int atype, i;
+
+	EnterFunction(2);
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (udest->af == AF_INET6) {
+		int ret;
+
+		atype = ipv6_addr_type(&udest->addr.in6);
+		if ((!(atype & IPV6_ADDR_UNICAST) ||
+			atype & IPV6_ADDR_LINKLOCAL) &&
+			!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
+			return -EINVAL;
+
+		ret = nf_defrag_ipv6_enable(svc->ipvs->net);
+		if (ret)
+			return ret;
+	} else
+#endif
+	{
+		atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
+		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+			return -EINVAL;
+	}
+
+	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
+	if (dest == NULL)
+		return -ENOMEM;
+
+	dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!dest->stats.cpustats)
+		goto err_alloc;
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_dest_stats;
+		ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
+		u64_stats_init(&ip_vs_dest_stats->syncp);
+	}
+
+	dest->af = udest->af;
+	dest->protocol = svc->protocol;
+	dest->vaddr = svc->addr;
+	dest->vport = svc->port;
+	dest->vfwmark = svc->fwmark;
+	ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
+	dest->port = udest->port;
+
+	atomic_set(&dest->activeconns, 0);
+	atomic_set(&dest->inactconns, 0);
+	atomic_set(&dest->persistconns, 0);
+	refcount_set(&dest->refcnt, 1);
+
+	INIT_HLIST_NODE(&dest->d_list);
+	spin_lock_init(&dest->dst_lock);
+	spin_lock_init(&dest->stats.lock);
+	__ip_vs_update_dest(svc, dest, udest, 1);
+
+	LeaveFunction(2);
+	return 0;
+
+err_alloc:
+	kfree(dest);
+	return -ENOMEM;
+}
+
+
+/*
+ *	Add a destination into an existing service
+ */
+static int
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+	int ret;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		pr_err("%s(): server weight less than zero\n", __func__);
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		pr_err("%s(): lower threshold is higher than upper threshold\n",
+			__func__);
+		return -ERANGE;
+	}
+
+	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if (udest->tun_port == 0) {
+			pr_err("%s(): tunnel port is zero\n", __func__);
+			return -EINVAL;
+		}
+	}
+
+	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
+
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
+	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
+	rcu_read_unlock();
+
+	if (dest != NULL) {
+		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
+		return -EEXIST;
+	}
+
+	/*
+	 * Check if the dest already exists in the trash and
+	 * is from the same service
+	 */
+	dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
+
+	if (dest != NULL) {
+		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+			      "dest->refcnt=%d, service %u/%s:%u\n",
+			      IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
+			      refcount_read(&dest->refcnt),
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+			      ntohs(dest->vport));
+
+		__ip_vs_update_dest(svc, dest, udest, 1);
+		ret = 0;
+	} else {
+		/*
+		 * Allocate and initialize the dest structure
+		 */
+		ret = ip_vs_new_dest(svc, udest);
+	}
+	LeaveFunction(2);
+
+	return ret;
+}
+
+
+/*
+ *	Edit a destination in the given service
+ */
+static int
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		pr_err("%s(): server weight less than zero\n", __func__);
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		pr_err("%s(): lower threshold is higher than upper threshold\n",
+			__func__);
+		return -ERANGE;
+	}
+
+	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if (udest->tun_port == 0) {
+			pr_err("%s(): tunnel port is zero\n", __func__);
+			return -EINVAL;
+		}
+	}
+
+	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
+
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
+	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
+	rcu_read_unlock();
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
+		return -ENOENT;
+	}
+
+	__ip_vs_update_dest(svc, dest, udest, 0);
+	LeaveFunction(2);
+
+	return 0;
+}
+
+/*
+ *	Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
+			     bool cleanup)
+{
+	ip_vs_stop_estimator(ipvs, &dest->stats);
+
+	/*
+	 *  Remove it from the d-linked list with the real services.
+	 */
+	ip_vs_rs_unhash(dest);
+
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+		      refcount_read(&dest->refcnt));
+	if (list_empty(&ipvs->dest_trash) && !cleanup)
+		mod_timer(&ipvs->dest_trash_timer,
+			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
+	/* dest lives in trash with reference */
+	list_add(&dest->t_list, &ipvs->dest_trash);
+	dest->idle_start = 0;
+	spin_unlock_bh(&ipvs->dest_trash_lock);
+
+	/* Queue up delayed work to expire all no destination connections.
+	 * No-op when CONFIG_SYSCTL is disabled.
+	 */
+	if (!cleanup)
+		ip_vs_enqueue_expire_nodest_conns(ipvs);
+}
+
+
+/*
+ *	Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				int svcupd)
+{
+	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+	/*
+	 *  Remove it from the d-linked destination list.
+	 */
+	list_del_rcu(&dest->n_list);
+	svc->num_dests--;
+
+	if (dest->af != svc->af)
+		svc->ipvs->mixed_address_family_dests--;
+
+	if (svcupd) {
+		struct ip_vs_scheduler *sched;
+
+		sched = rcu_dereference_protected(svc->scheduler, 1);
+		if (sched && sched->del_dest)
+			sched->del_dest(svc, dest);
+	}
+}
+
+
+/*
+ *	Delete a destination server in the given service
+ */
+static int
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	/* We use function that requires RCU lock */
+	rcu_read_lock();
+	dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
+	rcu_read_unlock();
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
+		return -ENOENT;
+	}
+
+	/*
+	 *	Unlink dest from the service
+	 */
+	__ip_vs_unlink_dest(svc, dest, 1);
+
+	/*
+	 *	Delete the destination
+	 */
+	__ip_vs_del_dest(svc->ipvs, dest, false);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+static void ip_vs_dest_trash_expire(struct timer_list *t)
+{
+	struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
+	struct ip_vs_dest *dest, *next;
+	unsigned long now = jiffies;
+
+	spin_lock(&ipvs->dest_trash_lock);
+	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
+		if (refcount_read(&dest->refcnt) > 1)
+			continue;
+		if (dest->idle_start) {
+			if (time_before(now, dest->idle_start +
+					     IP_VS_DEST_TRASH_PERIOD))
+				continue;
+		} else {
+			dest->idle_start = max(1UL, now);
+			continue;
+		}
+		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port));
+		list_del(&dest->t_list);
+		ip_vs_dest_free(dest);
+	}
+	if (!list_empty(&ipvs->dest_trash))
+		mod_timer(&ipvs->dest_trash_timer,
+			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
+	spin_unlock(&ipvs->dest_trash_lock);
+}
+
+/*
+ *	Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
+		  struct ip_vs_service **svc_p)
+{
+	int ret = 0, i;
+	struct ip_vs_scheduler *sched = NULL;
+	struct ip_vs_pe *pe = NULL;
+	struct ip_vs_service *svc = NULL;
+	int ret_hooks = -1;
+
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc())
+		return -ENOPROTOOPT;
+
+	/* Lookup the scheduler by 'u->sched_name' */
+	if (strcmp(u->sched_name, "none")) {
+		sched = ip_vs_scheduler_get(u->sched_name);
+		if (!sched) {
+			pr_info("Scheduler module ip_vs_%s not found\n",
+				u->sched_name);
+			ret = -ENOENT;
+			goto out_err;
+		}
+	}
+
+	if (u->pe_name && *u->pe_name) {
+		pe = ip_vs_pe_getbyname(u->pe_name);
+		if (pe == NULL) {
+			pr_info("persistence engine module ip_vs_pe_%s "
+				"not found\n", u->pe_name);
+			ret = -ENOENT;
+			goto out_err;
+		}
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+
+		ret = nf_defrag_ipv6_enable(ipvs->net);
+		if (ret)
+			goto out_err;
+	}
+#endif
+
+	if ((u->af == AF_INET && !ipvs->num_services) ||
+	    (u->af == AF_INET6 && !ipvs->num_services6)) {
+		ret = ip_vs_register_hooks(ipvs, u->af);
+		if (ret < 0)
+			goto out_err;
+		ret_hooks = ret;
+	}
+
+	svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
+	if (svc == NULL) {
+		IP_VS_DBG(1, "%s(): no memory\n", __func__);
+		ret = -ENOMEM;
+		goto out_err;
+	}
+	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!svc->stats.cpustats) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ip_vs_stats;
+		ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
+		u64_stats_init(&ip_vs_stats->syncp);
+	}
+
+
+	/* I'm the first user of the service */
+	atomic_set(&svc->refcnt, 0);
+
+	svc->af = u->af;
+	svc->protocol = u->protocol;
+	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
+	svc->port = u->port;
+	svc->fwmark = u->fwmark;
+	svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+	svc->ipvs = ipvs;
+
+	INIT_LIST_HEAD(&svc->destinations);
+	spin_lock_init(&svc->sched_lock);
+	spin_lock_init(&svc->stats.lock);
+
+	/* Bind the scheduler */
+	if (sched) {
+		ret = ip_vs_bind_scheduler(svc, sched);
+		if (ret)
+			goto out_err;
+		sched = NULL;
+	}
+
+	/* Bind the ct retriever */
+	RCU_INIT_POINTER(svc->pe, pe);
+	pe = NULL;
+
+	/* Update the virtual service counters */
+	if (svc->port == FTPPORT)
+		atomic_inc(&ipvs->ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_inc(&ipvs->nullsvc_counter);
+	if (svc->pe && svc->pe->conn_out)
+		atomic_inc(&ipvs->conn_out_counter);
+
+	ip_vs_start_estimator(ipvs, &svc->stats);
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ipvs->num_services++;
+	else if (svc->af == AF_INET6)
+		ipvs->num_services6++;
+
+	/* Hash the service into the service table */
+	ip_vs_svc_hash(svc);
+
+	*svc_p = svc;
+	/* Now there is a service - full throttle */
+	ipvs->enable = 1;
+	return 0;
+
+
+ out_err:
+	if (ret_hooks >= 0)
+		ip_vs_unregister_hooks(ipvs, u->af);
+	if (svc != NULL) {
+		ip_vs_unbind_scheduler(svc, sched);
+		ip_vs_service_free(svc);
+	}
+	ip_vs_scheduler_put(sched);
+	ip_vs_pe_put(pe);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+/*
+ *	Edit a service and bind it with a new scheduler
+ */
+static int
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
+{
+	struct ip_vs_scheduler *sched = NULL, *old_sched;
+	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
+	int ret = 0;
+	bool new_pe_conn_out, old_pe_conn_out;
+
+	/*
+	 * Lookup the scheduler, by 'u->sched_name'
+	 */
+	if (strcmp(u->sched_name, "none")) {
+		sched = ip_vs_scheduler_get(u->sched_name);
+		if (!sched) {
+			pr_info("Scheduler module ip_vs_%s not found\n",
+				u->sched_name);
+			return -ENOENT;
+		}
+	}
+	old_sched = sched;
+
+	if (u->pe_name && *u->pe_name) {
+		pe = ip_vs_pe_getbyname(u->pe_name);
+		if (pe == NULL) {
+			pr_info("persistence engine module ip_vs_pe_%s "
+				"not found\n", u->pe_name);
+			ret = -ENOENT;
+			goto out;
+		}
+		old_pe = pe;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+#endif
+
+	old_sched = rcu_dereference_protected(svc->scheduler, 1);
+	if (sched != old_sched) {
+		if (old_sched) {
+			ip_vs_unbind_scheduler(svc, old_sched);
+			RCU_INIT_POINTER(svc->scheduler, NULL);
+			/* Wait all svc->sched_data users */
+			synchronize_rcu();
+		}
+		/* Bind the new scheduler */
+		if (sched) {
+			ret = ip_vs_bind_scheduler(svc, sched);
+			if (ret) {
+				ip_vs_scheduler_put(sched);
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * Set the flags and timeout value
+	 */
+	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	old_pe = rcu_dereference_protected(svc->pe, 1);
+	if (pe != old_pe) {
+		rcu_assign_pointer(svc->pe, pe);
+		/* check for optional methods in new pe */
+		new_pe_conn_out = (pe && pe->conn_out) ? true : false;
+		old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
+		if (new_pe_conn_out && !old_pe_conn_out)
+			atomic_inc(&svc->ipvs->conn_out_counter);
+		if (old_pe_conn_out && !new_pe_conn_out)
+			atomic_dec(&svc->ipvs->conn_out_counter);
+	}
+
+out:
+	ip_vs_scheduler_put(old_sched);
+	ip_vs_pe_put(old_pe);
+	return ret;
+}
+
+/*
+ *	Delete a service from the service list
+ *	- The service must be unlinked, unlocked and not referenced!
+ *	- We are called under _bh lock
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
+{
+	struct ip_vs_dest *dest, *nxt;
+	struct ip_vs_scheduler *old_sched;
+	struct ip_vs_pe *old_pe;
+	struct netns_ipvs *ipvs = svc->ipvs;
+
+	if (svc->af == AF_INET) {
+		ipvs->num_services--;
+		if (!ipvs->num_services)
+			ip_vs_unregister_hooks(ipvs, svc->af);
+	} else if (svc->af == AF_INET6) {
+		ipvs->num_services6--;
+		if (!ipvs->num_services6)
+			ip_vs_unregister_hooks(ipvs, svc->af);
+	}
+
+	ip_vs_stop_estimator(svc->ipvs, &svc->stats);
+
+	/* Unbind scheduler */
+	old_sched = rcu_dereference_protected(svc->scheduler, 1);
+	ip_vs_unbind_scheduler(svc, old_sched);
+	ip_vs_scheduler_put(old_sched);
+
+	/* Unbind persistence engine, keep svc->pe */
+	old_pe = rcu_dereference_protected(svc->pe, 1);
+	if (old_pe && old_pe->conn_out)
+		atomic_dec(&ipvs->conn_out_counter);
+	ip_vs_pe_put(old_pe);
+
+	/*
+	 *    Unlink the whole destination list
+	 */
+	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
+		__ip_vs_unlink_dest(svc, dest, 0);
+		__ip_vs_del_dest(svc->ipvs, dest, cleanup);
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_dec(&ipvs->ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_dec(&ipvs->nullsvc_counter);
+
+	/*
+	 *    Free the service if nobody refers to it
+	 */
+	__ip_vs_svc_put(svc, true);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+/*
+ * Unlink a service from list and try to delete it if its refcnt reached 0
+ */
+static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
+{
+	ip_vs_unregister_conntrack(svc);
+	/* Hold svc to avoid double release from dest_trash */
+	atomic_inc(&svc->refcnt);
+	/*
+	 * Unhash it from the service table
+	 */
+	ip_vs_svc_unhash(svc);
+
+	__ip_vs_del_service(svc, cleanup);
+}
+
+/*
+ *	Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+	if (svc == NULL)
+		return -EEXIST;
+	ip_vs_unlink_service(svc, false);
+
+	return 0;
+}
+
+
+/*
+ *	Flush all the virtual services
+ */
+static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
+{
+	int idx;
+	struct ip_vs_service *svc;
+	struct hlist_node *n;
+
+	/*
+	 * Flush the service table hashed by <netns,protocol,addr,port>
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
+					  s_list) {
+			if (svc->ipvs == ipvs)
+				ip_vs_unlink_service(svc, cleanup);
+		}
+	}
+
+	/*
+	 * Flush the service table hashed by fwmark
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
+					  f_list) {
+			if (svc->ipvs == ipvs)
+				ip_vs_unlink_service(svc, cleanup);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	Delete service by {netns} in the service table.
+ *	Called by __ip_vs_batch_cleanup()
+ */
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
+{
+	struct netns_ipvs *ipvs;
+	struct net *net;
+
+	EnterFunction(2);
+	/* Check for "full" addressed entries */
+	mutex_lock(&__ip_vs_mutex);
+	list_for_each_entry(net, net_list, exit_list) {
+		ipvs = net_ipvs(net);
+		ip_vs_flush(ipvs, true);
+	}
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+}
+
+/* Put all references for device (dst_cache) */
+static inline void
+ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
+{
+	struct ip_vs_dest_dst *dest_dst;
+
+	spin_lock_bh(&dest->dst_lock);
+	dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
+	if (dest_dst && dest_dst->dst_cache->dev == dev) {
+		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
+			      dev->name,
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      refcount_read(&dest->refcnt));
+		__ip_vs_dst_cache_reset(dest);
+	}
+	spin_unlock_bh(&dest->dst_lock);
+
+}
+/* Netdev event receiver
+ * Currently only NETDEV_DOWN is handled to release refs to cached dsts
+ */
+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+			   void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	unsigned int idx;
+
+	if (event != NETDEV_DOWN || !ipvs)
+		return NOTIFY_DONE;
+	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+	EnterFunction(2);
+	mutex_lock(&__ip_vs_mutex);
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (svc->ipvs == ipvs) {
+				list_for_each_entry(dest, &svc->destinations,
+						    n_list) {
+					ip_vs_forget_dev(dest, dev);
+				}
+			}
+		}
+
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (svc->ipvs == ipvs) {
+				list_for_each_entry(dest, &svc->destinations,
+						    n_list) {
+					ip_vs_forget_dev(dest, dev);
+				}
+			}
+
+		}
+	}
+
+	spin_lock_bh(&ipvs->dest_trash_lock);
+	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
+		ip_vs_forget_dev(dest, dev);
+	}
+	spin_unlock_bh(&ipvs->dest_trash_lock);
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Zero counters in a service or all services
+ */
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		ip_vs_zero_stats(&dest->stats);
+	}
+	ip_vs_zero_stats(&svc->stats);
+	return 0;
+}
+
+static int ip_vs_zero_all(struct netns_ipvs *ipvs)
+{
+	int idx;
+	struct ip_vs_service *svc;
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (svc->ipvs == ipvs)
+				ip_vs_zero_service(svc);
+		}
+	}
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (svc->ipvs == ipvs)
+				ip_vs_zero_service(svc);
+		}
+	}
+
+	ip_vs_zero_stats(&ipvs->tot_stats);
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int
+proc_do_defense_mode(struct ctl_table *table, int write,
+		     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct netns_ipvs *ipvs = table->extra2;
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = sizeof(int),
+		.mode = table->mode,
+	};
+
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if (val < 0 || val > 3) {
+			rc = -EINVAL;
+		} else {
+			*valp = val;
+			update_defense_level(ipvs);
+		}
+	}
+	return rc;
+}
+
+static int
+proc_do_sync_threshold(struct ctl_table *table, int write,
+		       void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct netns_ipvs *ipvs = table->extra2;
+	int *valp = table->data;
+	int val[2];
+	int rc;
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = table->maxlen,
+		.mode = table->mode,
+	};
+
+	mutex_lock(&ipvs->sync_mutex);
+	memcpy(val, valp, sizeof(val));
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write) {
+		if (val[0] < 0 || val[1] < 0 ||
+		    (val[0] >= val[1] && val[1]))
+			rc = -EINVAL;
+		else
+			memcpy(valp, val, sizeof(val));
+	}
+	mutex_unlock(&ipvs->sync_mutex);
+	return rc;
+}
+
+static int
+proc_do_sync_ports(struct ctl_table *table, int write,
+		   void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	struct ctl_table tmp = {
+		.data = &val,
+		.maxlen = sizeof(int),
+		.mode = table->mode,
+	};
+
+	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if (val < 1 || !is_power_of_2(val))
+			rc = -EINVAL;
+		else
+			*valp = val;
+	}
+	return rc;
+}
+
+/*
+ *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *	Do not change order or insert new entries without
+ *	align with netns init in ip_vs_control_net_init()
+ */
+
+static struct ctl_table vs_vars[] = {
+	{
+		.procname	= "amemthresh",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "am_droprate",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "drop_entry",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_defense_mode,
+	},
+	{
+		.procname	= "drop_packet",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_defense_mode,
+	},
+#ifdef CONFIG_IP_VS_NFCT
+	{
+		.procname	= "conntrack",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "secure_tcp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_defense_mode,
+	},
+	{
+		.procname	= "snat_reroute",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "sync_version",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "sync_ports",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_do_sync_ports,
+	},
+	{
+		.procname	= "sync_persist_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_qlen_max",
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "sync_sock_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "cache_bypass",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sloppy_tcp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sloppy_sctp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.maxlen		=
+			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= proc_do_sync_threshold,
+	},
+	{
+		.procname	= "sync_refresh_period",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "sync_retries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_THREE,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "pmtu_disc",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "backup_only",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "conn_reuse_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "schedule_icmp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "ignore_tunneled",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "run_estimation",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{ }
+};
+
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+struct ip_vs_iter {
+	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
+	struct hlist_head *table;
+	int bucket;
+};
+
+/*
+ *	Write the contents of the VS rule table to a PROCfs file.
+ *	(It is kept just for backward compatibility)
+ */
+static inline const char *ip_vs_fwd_name(unsigned int flags)
+{
+	switch (flags & IP_VS_CONN_F_FWD_MASK) {
+	case IP_VS_CONN_F_LOCALNODE:
+		return "Local";
+	case IP_VS_CONN_F_TUNNEL:
+		return "Tunnel";
+	case IP_VS_CONN_F_DROUTE:
+		return "Route";
+	default:
+		return "Masq";
+	}
+}
+
+
+/* Get the Nth entry in the two lists */
+static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_iter *iter = seq->private;
+	int idx;
+	struct ip_vs_service *svc;
+
+	/* look in hash by protocol */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
+			if ((svc->ipvs == ipvs) && pos-- == 0) {
+				iter->table = ip_vs_svc_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	/* keep looking in fwmark */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
+					 f_list) {
+			if ((svc->ipvs == ipvs) && pos-- == 0) {
+				iter->table = ip_vs_svc_fwm_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+
+static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct hlist_node *e;
+	struct ip_vs_iter *iter;
+	struct ip_vs_service *svc;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_info_array(seq,0);
+
+	svc = v;
+	iter = seq->private;
+
+	if (iter->table == ip_vs_svc_table) {
+		/* next service in table hashed by protocol */
+		e = rcu_dereference(hlist_next_rcu(&svc->s_list));
+		if (e)
+			return hlist_entry(e, struct ip_vs_service, s_list);
+
+		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+			hlist_for_each_entry_rcu(svc,
+						 &ip_vs_svc_table[iter->bucket],
+						 s_list) {
+				return svc;
+			}
+		}
+
+		iter->table = ip_vs_svc_fwm_table;
+		iter->bucket = -1;
+		goto scan_fwmark;
+	}
+
+	/* next service in hashed by fwmark */
+	e = rcu_dereference(hlist_next_rcu(&svc->f_list));
+	if (e)
+		return hlist_entry(e, struct ip_vs_service, f_list);
+
+ scan_fwmark:
+	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+		hlist_for_each_entry_rcu(svc,
+					 &ip_vs_svc_fwm_table[iter->bucket],
+					 f_list)
+			return svc;
+	}
+
+	return NULL;
+}
+
+static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+
+static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq,
+			"IP Virtual Server version %d.%d.%d (size=%d)\n",
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
+		seq_puts(seq,
+			 "Prot LocalAddress:Port Scheduler Flags\n");
+		seq_puts(seq,
+			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
+	} else {
+		struct net *net = seq_file_net(seq);
+		struct netns_ipvs *ipvs = net_ipvs(net);
+		const struct ip_vs_service *svc = v;
+		const struct ip_vs_iter *iter = seq->private;
+		const struct ip_vs_dest *dest;
+		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+		char *sched_name = sched ? sched->name : "none";
+
+		if (svc->ipvs != ipvs)
+			return 0;
+		if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (svc->af == AF_INET6)
+				seq_printf(seq, "%s  [%pI6]:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   &svc->addr.in6,
+					   ntohs(svc->port),
+					   sched_name);
+			else
+#endif
+				seq_printf(seq, "%s  %08X:%04X %s %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   ntohl(svc->addr.ip),
+					   ntohs(svc->port),
+					   sched_name,
+					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
+		} else {
+			seq_printf(seq, "FWM  %08X %s %s",
+				   svc->fwmark, sched_name,
+				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
+		}
+
+		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+			seq_printf(seq, "persistent %d %08X\n",
+				svc->timeout,
+				ntohl(svc->netmask));
+		else
+			seq_putc(seq, '\n');
+
+		list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (dest->af == AF_INET6)
+				seq_printf(seq,
+					   "  -> [%pI6]:%04X"
+					   "      %-7s %-6d %-10d %-10d\n",
+					   &dest->addr.in6,
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+			else
+#endif
+				seq_printf(seq,
+					   "  -> %08X:%04X      "
+					   "%-7s %-6d %-10d %-10d\n",
+					   ntohl(dest->addr.ip),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+
+		}
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_info_seq_ops = {
+	.start = ip_vs_info_seq_start,
+	.next  = ip_vs_info_seq_next,
+	.stop  = ip_vs_info_seq_stop,
+	.show  = ip_vs_info_seq_show,
+};
+
+static int ip_vs_stats_show(struct seq_file *seq, void *v)
+{
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_kstats show;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_puts(seq,
+		 "   Conns  Packets  Packets            Bytes            Bytes\n");
+
+	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
+	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+		   (unsigned long long)show.conns,
+		   (unsigned long long)show.inpkts,
+		   (unsigned long long)show.outpkts,
+		   (unsigned long long)show.inbytes,
+		   (unsigned long long)show.outbytes);
+
+/*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
+	seq_puts(seq,
+		 " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+		   (unsigned long long)show.cps,
+		   (unsigned long long)show.inpps,
+		   (unsigned long long)show.outpps,
+		   (unsigned long long)show.inbps,
+		   (unsigned long long)show.outbps);
+
+	return 0;
+}
+
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
+	struct ip_vs_kstats kstats;
+	int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_puts(seq,
+		 "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+		unsigned int start;
+		u64 conns, inpkts, outpkts, inbytes, outbytes;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&u->syncp);
+			conns = u64_stats_read(&u->cnt.conns);
+			inpkts = u64_stats_read(&u->cnt.inpkts);
+			outpkts = u64_stats_read(&u->cnt.outpkts);
+			inbytes = u64_stats_read(&u->cnt.inbytes);
+			outbytes = u64_stats_read(&u->cnt.outbytes);
+		} while (u64_stats_fetch_retry_irq(&u->syncp, start));
+
+		seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+			   i, (u64)conns, (u64)inpkts,
+			   (u64)outpkts, (u64)inbytes,
+			   (u64)outbytes);
+	}
+
+	ip_vs_copy_stats(&kstats, tot_stats);
+
+	seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+		   (unsigned long long)kstats.conns,
+		   (unsigned long long)kstats.inpkts,
+		   (unsigned long long)kstats.outpkts,
+		   (unsigned long long)kstats.inbytes,
+		   (unsigned long long)kstats.outbytes);
+
+/*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
+		   kstats.cps,
+		   kstats.inpps,
+		   kstats.outpps,
+		   kstats.inbps,
+		   kstats.outbps);
+
+	return 0;
+}
+#endif
+
+/*
+ *	Set timeout values for tcp tcpfin udp in the timeout_table.
+ */
+static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
+{
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
+	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+		  u->tcp_timeout,
+		  u->tcp_fin_timeout,
+		  u->udp_timeout);
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
+	    u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
+		return -EINVAL;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
+		return -EINVAL;
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout) {
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
+			= u->tcp_timeout * HZ;
+	}
+
+	if (u->tcp_fin_timeout) {
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
+			= u->tcp_fin_timeout * HZ;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout) {
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
+		pd->timeout_table[IP_VS_UDP_S_NORMAL]
+			= u->udp_timeout * HZ;
+	}
+#endif
+	return 0;
+}
+
+#define CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+
+struct ip_vs_svcdest_user {
+	struct ip_vs_service_user	s;
+	struct ip_vs_dest_user		d;
+};
+
+static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
+	[CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
+	[CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
+	[CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
+	[CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
+	[CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
+	[CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
+	[CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
+	[CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
+	[CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
+	[CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
+};
+
+union ip_vs_set_arglen {
+	struct ip_vs_service_user	field_IP_VS_SO_SET_ADD;
+	struct ip_vs_service_user	field_IP_VS_SO_SET_EDIT;
+	struct ip_vs_service_user	field_IP_VS_SO_SET_DEL;
+	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_ADDDEST;
+	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_DELDEST;
+	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_EDITDEST;
+	struct ip_vs_timeout_user	field_IP_VS_SO_SET_TIMEOUT;
+	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STARTDAEMON;
+	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STOPDAEMON;
+	struct ip_vs_service_user	field_IP_VS_SO_SET_ZERO;
+};
+
+#define MAX_SET_ARGLEN	sizeof(union ip_vs_set_arglen)
+
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+				  struct ip_vs_service_user *usvc_compat)
+{
+	memset(usvc, 0, sizeof(*usvc));
+
+	usvc->af		= AF_INET;
+	usvc->protocol		= usvc_compat->protocol;
+	usvc->addr.ip		= usvc_compat->addr;
+	usvc->port		= usvc_compat->port;
+	usvc->fwmark		= usvc_compat->fwmark;
+
+	/* Deep copy of sched_name is not needed here */
+	usvc->sched_name	= usvc_compat->sched_name;
+
+	usvc->flags		= usvc_compat->flags;
+	usvc->timeout		= usvc_compat->timeout;
+	usvc->netmask		= usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+				   struct ip_vs_dest_user *udest_compat)
+{
+	memset(udest, 0, sizeof(*udest));
+
+	udest->addr.ip		= udest_compat->addr;
+	udest->port		= udest_compat->port;
+	udest->conn_flags	= udest_compat->conn_flags;
+	udest->weight		= udest_compat->weight;
+	udest->u_threshold	= udest_compat->u_threshold;
+	udest->l_threshold	= udest_compat->l_threshold;
+	udest->af		= AF_INET;
+	udest->tun_type		= IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
+}
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
+{
+	struct net *net = sock_net(sk);
+	int ret;
+	unsigned char arg[MAX_SET_ARGLEN];
+	struct ip_vs_service_user *usvc_compat;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_service *svc;
+	struct ip_vs_dest_user *udest_compat;
+	struct ip_vs_dest_user_kern udest;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	BUILD_BUG_ON(sizeof(arg) > 255);
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
+		return -EINVAL;
+	if (len != set_arglen[CMDID(cmd)]) {
+		IP_VS_DBG(1, "set_ctl: len %u != %u\n",
+			  len, set_arglen[CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_sockptr(arg, ptr, len) != 0)
+		return -EFAULT;
+
+	/* Handle daemons since they have another lock */
+	if (cmd == IP_VS_SO_SET_STARTDAEMON ||
+	    cmd == IP_VS_SO_SET_STOPDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+
+		if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+			struct ipvs_sync_daemon_cfg cfg;
+
+			memset(&cfg, 0, sizeof(cfg));
+			ret = -EINVAL;
+			if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
+				    sizeof(cfg.mcast_ifn)) <= 0)
+				return ret;
+			cfg.syncid = dm->syncid;
+			ret = start_sync_thread(ipvs, &cfg, dm->state);
+		} else {
+			ret = stop_sync_thread(ipvs, dm->state);
+		}
+		return ret;
+	}
+
+	mutex_lock(&__ip_vs_mutex);
+	if (cmd == IP_VS_SO_SET_FLUSH) {
+		/* Flush the virtual service */
+		ret = ip_vs_flush(ipvs, false);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
+		/* Set timeout values for (tcp tcpfin udp) */
+		ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
+		goto out_unlock;
+	} else if (!len) {
+		/* No more commands with len == 0 below */
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	usvc_compat = (struct ip_vs_service_user *)arg;
+	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+	/* We only use the new structs internally, so copy userspace compat
+	 * structs to extended internal versions */
+	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+	ip_vs_copy_udest_compat(&udest, udest_compat);
+
+	if (cmd == IP_VS_SO_SET_ZERO) {
+		/* if no service address is set, zero counters in all */
+		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
+			ret = ip_vs_zero_all(ipvs);
+			goto out_unlock;
+		}
+	}
+
+	if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
+	    strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
+	    IP_VS_SCHEDNAME_MAXLEN) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
+	    usvc.protocol != IPPROTO_SCTP) {
+		pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
+		       usvc.protocol, &usvc.addr.ip,
+		       ntohs(usvc.port));
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	rcu_read_lock();
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
+					   &usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
+	rcu_read_unlock();
+
+	if (cmd != IP_VS_SO_SET_ADD
+	    && (svc == NULL || svc->protocol != usvc.protocol)) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	switch (cmd) {
+	case IP_VS_SO_SET_ADD:
+		if (svc != NULL)
+			ret = -EEXIST;
+		else
+			ret = ip_vs_add_service(ipvs, &usvc, &svc);
+		break;
+	case IP_VS_SO_SET_EDIT:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IP_VS_SO_SET_DEL:
+		ret = ip_vs_del_service(svc);
+		if (!ret)
+			goto out_unlock;
+		break;
+	case IP_VS_SO_SET_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	case IP_VS_SO_SET_ADDDEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_EDITDEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_DELDEST:
+		ret = ip_vs_del_dest(svc, &udest);
+	}
+
+  out_unlock:
+	mutex_unlock(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static void
+ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
+{
+	struct ip_vs_scheduler *sched;
+	struct ip_vs_kstats kstats;
+	char *sched_name;
+
+	sched = rcu_dereference_protected(src->scheduler, 1);
+	sched_name = sched ? sched->name : "none";
+	dst->protocol = src->protocol;
+	dst->addr = src->addr.ip;
+	dst->port = src->port;
+	dst->fwmark = src->fwmark;
+	strscpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
+	dst->flags = src->flags;
+	dst->timeout = src->timeout / HZ;
+	dst->netmask = src->netmask;
+	dst->num_dests = src->num_dests;
+	ip_vs_copy_stats(&kstats, &src->stats);
+	ip_vs_export_stats_user(&dst->stats, &kstats);
+}
+
+static inline int
+__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
+			    const struct ip_vs_get_services *get,
+			    struct ip_vs_get_services __user *uptr)
+{
+	int idx, count=0;
+	struct ip_vs_service *svc;
+	struct ip_vs_service_entry entry;
+	int ret = 0;
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET || (svc->ipvs != ipvs))
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET || (svc->ipvs != ipvs))
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+out:
+	return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
+			 struct ip_vs_get_dests __user *uptr)
+{
+	struct ip_vs_service *svc;
+	union nf_inet_addr addr = { .ip = get->addr };
+	int ret = 0;
+
+	rcu_read_lock();
+	if (get->fwmark)
+		svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
+	else
+		svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
+					   get->port);
+	rcu_read_unlock();
+
+	if (svc) {
+		int count = 0;
+		struct ip_vs_dest *dest;
+		struct ip_vs_dest_entry entry;
+		struct ip_vs_kstats kstats;
+
+		memset(&entry, 0, sizeof(entry));
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+			if (count >= get->num_dests)
+				break;
+
+			/* Cannot expose heterogeneous members via sockopt
+			 * interface
+			 */
+			if (dest->af != svc->af)
+				continue;
+
+			entry.addr = dest->addr.ip;
+			entry.port = dest->port;
+			entry.conn_flags = atomic_read(&dest->conn_flags);
+			entry.weight = atomic_read(&dest->weight);
+			entry.u_threshold = dest->u_threshold;
+			entry.l_threshold = dest->l_threshold;
+			entry.activeconns = atomic_read(&dest->activeconns);
+			entry.inactconns = atomic_read(&dest->inactconns);
+			entry.persistconns = atomic_read(&dest->persistconns);
+			ip_vs_copy_stats(&kstats, &dest->stats);
+			ip_vs_export_stats_user(&entry.stats, &kstats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				break;
+			}
+			count++;
+		}
+	} else
+		ret = -ESRCH;
+	return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
+{
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
+	memset(u, 0, sizeof (*u));
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
+	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
+	u->udp_timeout =
+			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+#endif
+}
+
+static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
+	[CMDID(IP_VS_SO_GET_VERSION)]  = 64,
+	[CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
+	[CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
+	[CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
+	[CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
+	[CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
+	[CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
+};
+
+union ip_vs_get_arglen {
+	char				field_IP_VS_SO_GET_VERSION[64];
+	struct ip_vs_getinfo		field_IP_VS_SO_GET_INFO;
+	struct ip_vs_get_services	field_IP_VS_SO_GET_SERVICES;
+	struct ip_vs_service_entry	field_IP_VS_SO_GET_SERVICE;
+	struct ip_vs_get_dests		field_IP_VS_SO_GET_DESTS;
+	struct ip_vs_timeout_user	field_IP_VS_SO_GET_TIMEOUT;
+	struct ip_vs_daemon_user	field_IP_VS_SO_GET_DAEMON[2];
+};
+
+#define MAX_GET_ARGLEN	sizeof(union ip_vs_get_arglen)
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	unsigned char arg[MAX_GET_ARGLEN];
+	int ret = 0;
+	unsigned int copylen;
+	struct net *net = sock_net(sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	BUG_ON(!net);
+	BUILD_BUG_ON(sizeof(arg) > 255);
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
+		return -EINVAL;
+
+	copylen = get_arglen[CMDID(cmd)];
+	if (*len < (int) copylen) {
+		IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, copylen) != 0)
+		return -EFAULT;
+	/*
+	 * Handle daemons first since it has its own locking
+	 */
+	if (cmd == IP_VS_SO_GET_DAEMON) {
+		struct ip_vs_daemon_user d[2];
+
+		memset(&d, 0, sizeof(d));
+		mutex_lock(&ipvs->sync_mutex);
+		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
+			d[0].state = IP_VS_STATE_MASTER;
+			strscpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
+				sizeof(d[0].mcast_ifn));
+			d[0].syncid = ipvs->mcfg.syncid;
+		}
+		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
+			d[1].state = IP_VS_STATE_BACKUP;
+			strscpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
+				sizeof(d[1].mcast_ifn));
+			d[1].syncid = ipvs->bcfg.syncid;
+		}
+		if (copy_to_user(user, &d, sizeof(d)) != 0)
+			ret = -EFAULT;
+		mutex_unlock(&ipvs->sync_mutex);
+		return ret;
+	}
+
+	mutex_lock(&__ip_vs_mutex);
+	switch (cmd) {
+	case IP_VS_SO_GET_VERSION:
+	{
+		char buf[64];
+
+		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
+		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+		*len = strlen(buf)+1;
+	}
+	break;
+
+	case IP_VS_SO_GET_INFO:
+	{
+		struct ip_vs_getinfo info;
+		info.version = IP_VS_VERSION_CODE;
+		info.size = ip_vs_conn_tab_size;
+		info.num_services = ipvs->num_services;
+		if (copy_to_user(user, &info, sizeof(info)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICES:
+	{
+		struct ip_vs_get_services *get;
+		int size;
+
+		get = (struct ip_vs_get_services *)arg;
+		size = struct_size(get, entrytable, get->num_services);
+		if (*len != size) {
+			pr_err("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_service_entries(ipvs, get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICE:
+	{
+		struct ip_vs_service_entry *entry;
+		struct ip_vs_service *svc;
+		union nf_inet_addr addr;
+
+		entry = (struct ip_vs_service_entry *)arg;
+		addr.ip = entry->addr;
+		rcu_read_lock();
+		if (entry->fwmark)
+			svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
+		else
+			svc = __ip_vs_service_find(ipvs, AF_INET,
+						   entry->protocol, &addr,
+						   entry->port);
+		rcu_read_unlock();
+		if (svc) {
+			ip_vs_copy_service(entry, svc);
+			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
+				ret = -EFAULT;
+		} else
+			ret = -ESRCH;
+	}
+	break;
+
+	case IP_VS_SO_GET_DESTS:
+	{
+		struct ip_vs_get_dests *get;
+		int size;
+
+		get = (struct ip_vs_get_dests *)arg;
+		size = struct_size(get, entrytable, get->num_dests);
+		if (*len != size) {
+			pr_err("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_dest_entries(ipvs, get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_TIMEOUT:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(ipvs, &t);
+		if (copy_to_user(user, &t, sizeof(t)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	mutex_unlock(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= IP_VS_BASE_CTL,
+	.set_optmax	= IP_VS_SO_SET_MAX+1,
+	.set		= do_ip_vs_set_ctl,
+	.get_optmin	= IP_VS_BASE_CTL,
+	.get_optmax	= IP_VS_SO_GET_MAX+1,
+	.get		= do_ip_vs_get_ctl,
+	.owner		= THIS_MODULE,
+};
+
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family;
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN - 1 },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_SYNC_MAXLEN]	= { .type = NLA_U16 },
+	[IPVS_DAEMON_ATTR_MCAST_GROUP]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_GROUP6]	= { .len = sizeof(struct in6_addr) },
+	[IPVS_DAEMON_ATTR_MCAST_PORT]	= { .type = NLA_U16 },
+	[IPVS_DAEMON_ATTR_MCAST_TTL]	= { .type = NLA_U8 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
+	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_PENAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+	[IPVS_DEST_ATTR_ADDR_FAMILY]	= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_TUN_TYPE]	= { .type = NLA_U8 },
+	[IPVS_DEST_ATTR_TUN_PORT]	= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_TUN_FLAGS]	= { .type = NLA_U16 },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_kstats *kstats)
+{
+	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
+
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+		goto nla_put_failure;
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+				   struct ip_vs_kstats *kstats)
+{
+	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
+
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
+			      IPVS_STATS_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
+			      IPVS_STATS_ATTR_PAD))
+		goto nla_put_failure;
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct ip_vs_scheduler *sched;
+	struct ip_vs_pe *pe;
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+	struct ip_vs_kstats kstats;
+	char *sched_name;
+
+	nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
+		goto nla_put_failure;
+	if (svc->fwmark) {
+		if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
+			goto nla_put_failure;
+	} else {
+		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
+		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
+		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
+			goto nla_put_failure;
+	}
+
+	sched = rcu_dereference_protected(svc->scheduler, 1);
+	sched_name = sched ? sched->name : "none";
+	pe = rcu_dereference_protected(svc->pe, 1);
+	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
+	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
+	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
+	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
+	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
+		goto nla_put_failure;
+	ip_vs_copy_stats(&kstats, &svc->stats);
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+		goto nla_put_failure;
+	if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	genlmsg_end(skb, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start || (svc->ipvs != ipvs))
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start || (svc->ipvs != ipvs))
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static bool ip_vs_is_af_valid(int af)
+{
+	if (af == AF_INET)
+		return true;
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6 && ipv6_mod_enabled())
+		return true;
+#endif
+	return false;
+}
+
+static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
+				    struct ip_vs_service_user_kern *usvc,
+				    struct nlattr *nla, bool full_entry,
+				    struct ip_vs_service **ret_svc)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+	struct ip_vs_service *svc;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	memset(usvc, 0, sizeof(*usvc));
+
+	usvc->af = nla_get_u16(nla_af);
+	if (!ip_vs_is_af_valid(usvc->af))
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_be16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	rcu_read_lock();
+	if (usvc->fwmark)
+		svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
+	else
+		svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
+					   &usvc->addr, usvc->port);
+	rcu_read_unlock();
+	*ret_svc = svc;
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (svc)
+			usvc->flags = svc->flags;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+		usvc->sched_name = nla_data(nla_sched);
+		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_be32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
+						     struct nlattr *nla)
+{
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_service *svc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
+	return ret ? ERR_PTR(ret) : svc;
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+	struct ip_vs_kstats kstats;
+
+	nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
+	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+			(atomic_read(&dest->conn_flags) &
+			 IP_VS_CONN_F_FWD_MASK)) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
+			atomic_read(&dest->weight)) ||
+	    nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
+		       dest->tun_type) ||
+	    nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
+			 dest->tun_port) ||
+	    nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
+			dest->tun_flags) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+			atomic_read(&dest->activeconns)) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+			atomic_read(&dest->inactconns)) ||
+	    nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+			atomic_read(&dest->persistconns)) ||
+	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
+		goto nla_put_failure;
+	ip_vs_copy_stats(&kstats, &dest->stats);
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+		goto nla_put_failure;
+	if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	genlmsg_end(skb, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
+		goto out_err;
+
+
+	svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR_OR_NULL(svc))
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
+				 struct nlattr *nla, bool full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+	struct nlattr *nla_addr_family;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+	nla_addr_family	= attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	memset(udest, 0, sizeof(*udest));
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_be16(nla_port);
+
+	if (nla_addr_family)
+		udest->af = nla_get_u16(nla_addr_family);
+	else
+		udest->af = 0;
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh, *nla_tun_type, *nla_tun_port,
+			      *nla_tun_flags;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+		nla_tun_type	= attrs[IPVS_DEST_ATTR_TUN_TYPE];
+		nla_tun_port	= attrs[IPVS_DEST_ATTR_TUN_PORT];
+		nla_tun_flags	= attrs[IPVS_DEST_ATTR_TUN_FLAGS];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+
+		if (nla_tun_type)
+			udest->tun_type = nla_get_u8(nla_tun_type);
+
+		if (nla_tun_port)
+			udest->tun_port = nla_get_be16(nla_tun_port);
+
+		if (nla_tun_flags)
+			udest->tun_flags = nla_get_u16(nla_tun_flags);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
+				  struct ipvs_sync_daemon_cfg *c)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
+	    nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
+	    nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
+	    nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
+	    nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
+	    nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
+		goto nla_put_failure;
+#ifdef CONFIG_IP_VS_IPV6
+	if (c->mcast_af == AF_INET6) {
+		if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
+				     &c->mcast_group.in6))
+			goto nla_put_failure;
+	} else
+#endif
+		if (c->mcast_af == AF_INET &&
+		    nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
+				    c->mcast_group.ip))
+			goto nla_put_failure;
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
+				  struct ipvs_sync_daemon_cfg *c,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, c))
+		goto nla_put_failure;
+
+	genlmsg_end(skb, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	mutex_lock(&ipvs->sync_mutex);
+	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   &ipvs->mcfg, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   &ipvs->bcfg, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&ipvs->sync_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
+{
+	struct ipvs_sync_daemon_cfg c;
+	struct nlattr *a;
+	int ret;
+
+	memset(&c, 0, sizeof(c));
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+	strscpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+		sizeof(c.mcast_ifn));
+	c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
+
+	a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
+	if (a)
+		c.sync_maxlen = nla_get_u16(a);
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
+	if (a) {
+		c.mcast_af = AF_INET;
+		c.mcast_group.ip = nla_get_in_addr(a);
+		if (!ipv4_is_multicast(c.mcast_group.ip))
+			return -EINVAL;
+	} else {
+		a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
+		if (a) {
+#ifdef CONFIG_IP_VS_IPV6
+			int addr_type;
+
+			c.mcast_af = AF_INET6;
+			c.mcast_group.in6 = nla_get_in6_addr(a);
+			addr_type = ipv6_addr_type(&c.mcast_group.in6);
+			if (!(addr_type & IPV6_ADDR_MULTICAST))
+				return -EINVAL;
+#else
+			return -EAFNOSUPPORT;
+#endif
+		}
+	}
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
+	if (a)
+		c.mcast_port = nla_get_u16(a);
+
+	a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
+	if (a)
+		c.mcast_ttl = nla_get_u8(a);
+
+	/* The synchronization protocol is incompatible with mixed family
+	 * services
+	 */
+	if (ipvs->mixed_address_family_dests > 0)
+		return -EINVAL;
+
+	ret = start_sync_thread(ipvs, &c,
+				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	return ret;
+}
+
+static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
+{
+	int ret;
+
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	ret = stop_sync_thread(ipvs,
+			       nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	return ret;
+}
+
+static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(ipvs, &t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(ipvs, &t);
+}
+
+static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = -EINVAL, cmd;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack))
+			goto out;
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
+	}
+
+out:
+	return ret;
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	bool need_full_svc = false, need_full_dest = false;
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_dest_user_kern udest;
+	int ret = 0, cmd;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush(ipvs, false);
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(ipvs, info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all(ipvs);
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = true;
+
+	ret = ip_vs_genl_parse_service(ipvs, &usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc, &svc);
+	if (ret)
+		goto out;
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = true;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+
+		/* Old protocols did not allow the user to specify address
+		 * family, so we set it to zero instead.  We also didn't
+		 * allow heterogeneous pools in the old code, so it's safe
+		 * to assume that this will have the same address family as
+		 * the service.
+		 */
+		if (udest.af == 0)
+			udest.af = svc->af;
+
+		if (!ip_vs_is_af_valid(udest.af)) {
+			ret = -EAFNOSUPPORT;
+			goto out;
+		}
+
+		if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
+			/* The synchronization protocol is incompatible
+			 * with mixed family services
+			 */
+			if (ipvs->sync_state) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			/* Which connection types do we support? */
+			switch (udest.conn_flags) {
+			case IP_VS_CONN_F_TUNNEL:
+				/* We are able to forward this */
+				break;
+			default:
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(ipvs, &usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		/* do not use svc, it can be freed */
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		pr_err("unknown Generic Netlink command\n");
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(ipvs,
+					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(ipvs, &t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
+				t.tcp_timeout) ||
+		    nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+				t.tcp_fin_timeout))
+			goto nla_put_failure;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
+			goto nla_put_failure;
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
+				IP_VS_VERSION_CODE) ||
+		    nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+				ip_vs_conn_tab_size))
+			goto nla_put_failure;
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_reply(msg, info);
+	goto out;
+
+nla_put_failure:
+	pr_err("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static const struct genl_small_ops ip_vs_genl_ops[] = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_daemon,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_daemon,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+static struct genl_family ip_vs_genl_family __ro_after_init = {
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_ATTR_MAX,
+	.policy = ip_vs_cmd_policy,
+	.netnsok        = true,         /* Make ipvsadm to work on netns */
+	.module		= THIS_MODULE,
+	.small_ops	= ip_vs_genl_ops,
+	.n_small_ops	= ARRAY_SIZE(ip_vs_genl_ops),
+	.resv_start_op	= IPVS_CMD_FLUSH + 1,
+};
+
+static int __init ip_vs_genl_register(void)
+{
+	return genl_register_family(&ip_vs_genl_family);
+}
+
+static void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
+/*
+ * per netns intit/exit func.
+ */
+#ifdef CONFIG_SYSCTL
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
+{
+	struct net *net = ipvs->net;
+	int idx;
+	struct ctl_table *tbl;
+
+	atomic_set(&ipvs->dropentry, 0);
+	spin_lock_init(&ipvs->dropentry_lock);
+	spin_lock_init(&ipvs->droppacket_lock);
+	spin_lock_init(&ipvs->securetcp_lock);
+
+	if (!net_eq(net, &init_net)) {
+		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+		if (tbl == NULL)
+			return -ENOMEM;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			tbl[0].procname = NULL;
+	} else
+		tbl = vs_vars;
+	/* Initialize sysctl defaults */
+	for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
+		if (tbl[idx].proc_handler == proc_do_defense_mode)
+			tbl[idx].extra2 = ipvs;
+	}
+	idx = 0;
+	ipvs->sysctl_amemthresh = 1024;
+	tbl[idx++].data = &ipvs->sysctl_amemthresh;
+	ipvs->sysctl_am_droprate = 10;
+	tbl[idx++].data = &ipvs->sysctl_am_droprate;
+	tbl[idx++].data = &ipvs->sysctl_drop_entry;
+	tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+	tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+	ipvs->sysctl_snat_reroute = 1;
+	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+	ipvs->sysctl_sync_ver = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	ipvs->sysctl_sync_ports = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ports;
+	tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
+	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+	ipvs->sysctl_sync_sock_size = 0;
+	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
+	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+	tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
+	tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
+	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
+	tbl[idx].data = &ipvs->sysctl_sync_threshold;
+	tbl[idx].extra2 = ipvs;
+	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
+	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
+	tbl[idx++].data = &ipvs->sysctl_sync_retries;
+	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+	ipvs->sysctl_pmtu_disc = 1;
+	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
+	tbl[idx++].data = &ipvs->sysctl_backup_only;
+	ipvs->sysctl_conn_reuse_mode = 1;
+	tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
+	tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
+	tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+	ipvs->sysctl_run_estimation = 1;
+	tbl[idx++].data = &ipvs->sysctl_run_estimation;
+#ifdef CONFIG_IP_VS_DEBUG
+	/* Global sysctls must be ro in non-init netns */
+	if (!net_eq(net, &init_net))
+		tbl[idx++].mode = 0444;
+#endif
+
+	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
+	if (ipvs->sysctl_hdr == NULL) {
+		if (!net_eq(net, &init_net))
+			kfree(tbl);
+		return -ENOMEM;
+	}
+	ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
+	ipvs->sysctl_tbl = tbl;
+	/* Schedule defense work */
+	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+	queue_delayed_work(system_long_wq, &ipvs->defense_work,
+			   DEFENSE_TIMER_PERIOD);
+
+	/* Init delayed work for expiring no dest conn */
+	INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work,
+			  expire_nodest_conn_handler);
+
+	return 0;
+}
+
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
+{
+	struct net *net = ipvs->net;
+
+	cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work);
+	cancel_delayed_work_sync(&ipvs->defense_work);
+	cancel_work_sync(&ipvs->defense_work.work);
+	unregister_net_sysctl_table(ipvs->sysctl_hdr);
+	ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->sysctl_tbl);
+}
+
+#else
+
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
+
+#endif
+
+static struct notifier_block ip_vs_dst_notifier = {
+	.notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+	.priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
+};
+
+int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
+{
+	int i, idx;
+
+	/* Initialize rs_table */
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
+
+	INIT_LIST_HEAD(&ipvs->dest_trash);
+	spin_lock_init(&ipvs->dest_trash_lock);
+	timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
+	atomic_set(&ipvs->ftpsvc_counter, 0);
+	atomic_set(&ipvs->nullsvc_counter, 0);
+	atomic_set(&ipvs->conn_out_counter, 0);
+
+	/* procfs stats */
+	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!ipvs->tot_stats.cpustats)
+		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *ipvs_tot_stats;
+		ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
+		u64_stats_init(&ipvs_tot_stats->syncp);
+	}
+
+	spin_lock_init(&ipvs->tot_stats.lock);
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net,
+			     &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)))
+		goto err_vs;
+	if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
+				    ip_vs_stats_show, NULL))
+		goto err_stats;
+	if (!proc_create_net_single("ip_vs_stats_percpu", 0,
+				    ipvs->net->proc_net,
+				    ip_vs_stats_percpu_show, NULL))
+		goto err_percpu;
+#endif
+
+	if (ip_vs_control_net_init_sysctl(ipvs))
+		goto err;
+
+	return 0;
+
+err:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+
+err_percpu:
+	remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+
+err_stats:
+	remove_proc_entry("ip_vs", ipvs->net->proc_net);
+
+err_vs:
+#endif
+	free_percpu(ipvs->tot_stats.cpustats);
+	return -ENOMEM;
+}
+
+void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
+{
+	ip_vs_trash_cleanup(ipvs);
+	ip_vs_control_net_cleanup_sysctl(ipvs);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+	remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+	remove_proc_entry("ip_vs", ipvs->net->proc_net);
+#endif
+	free_percpu(ipvs->tot_stats.cpustats);
+}
+
+int __init ip_vs_register_nl_ioctl(void)
+{
+	int ret;
+
+	ret = nf_register_sockopt(&ip_vs_sockopts);
+	if (ret) {
+		pr_err("cannot register sockopt.\n");
+		goto err_sock;
+	}
+
+	ret = ip_vs_genl_register();
+	if (ret) {
+		pr_err("cannot register Generic Netlink interface.\n");
+		goto err_genl;
+	}
+	return 0;
+
+err_genl:
+	nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
+	return ret;
+}
+
+void ip_vs_unregister_nl_ioctl(void)
+{
+	ip_vs_genl_unregister();
+	nf_unregister_sockopt(&ip_vs_sockopts);
+}
+
+int __init ip_vs_control_init(void)
+{
+	int idx;
+	int ret;
+
+	EnterFunction(2);
+
+	/* Initialize svc_table, ip_vs_svc_fwm_table */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+
+	smp_wmb();	/* Do we really need it now ? */
+
+	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+	if (ret < 0)
+		return ret;
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+void ip_vs_control_cleanup(void)
+{
+	EnterFunction(2);
+	unregister_netdevice_notifier(&ip_vs_dst_notifier);
+	LeaveFunction(2);
+}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
new file mode 100644
index 000000000..5e6ec32af
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Destination Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              Inspired by the consistent hashing scheduler patch from
+ *              Thomas Proell <proellt@gmx.de>
+ *
+ * Changes:
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[dest_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) OR (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/hash.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
+};
+
+/*
+ *     for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS        8
+#endif
+#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
+
+struct ip_vs_dh_state {
+	struct ip_vs_dh_bucket		buckets[IP_VS_DH_TAB_SIZE];
+	struct rcu_head			rcu_head;
+};
+
+/*
+ *	Returns hash value for IPVS DH entry
+ */
+static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
+{
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+	return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
+{
+	return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+	bool empty;
+
+	b = &s->buckets[0];
+	p = &svc->destinations;
+	empty = list_empty(p);
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest)
+			ip_vs_dest_put(dest);
+		if (empty)
+			RCU_INIT_POINTER(b->dest, NULL);
+		else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			ip_vs_dest_hold(dest);
+			RCU_INIT_POINTER(b->dest, dest);
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct ip_vs_dest *dest;
+
+	b = &s->buckets[0];
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest) {
+			ip_vs_dest_put(dest);
+			RCU_INIT_POINTER(b->dest, NULL);
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_state *s;
+
+	/* allocate the DH table for this service */
+	s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
+	if (s == NULL)
+		return -ENOMEM;
+
+	svc->sched_data = s;
+	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	/* assign the hash buckets with current dests */
+	ip_vs_dh_reassign(s, svc);
+
+	return 0;
+}
+
+
+static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_state *s = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(s);
+
+	/* release the table itself */
+	kfree_rcu(s, rcu_head);
+	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+}
+
+
+static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
+{
+	struct ip_vs_dh_state *s = svc->sched_data;
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_reassign(s, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_dh_state *s;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	s = (struct ip_vs_dh_state *) svc->sched_data;
+	dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+	IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+		      ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+	.name =			"dh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+	.init_service =		ip_vs_dh_init_svc,
+	.done_service =		ip_vs_dh_done_svc,
+	.add_dest =		ip_vs_dh_dest_changed,
+	.del_dest =		ip_vs_dh_dest_changed,
+	.schedule =		ip_vs_dh_schedule,
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+	synchronize_rcu();
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
new file mode 100644
index 000000000..f53150d82
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_est.c: simple rate estimator for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              Affected data: est_list and est_lock.
+ *              estimation_timer() runs with timer per netns.
+ *              get_stats()) do the per cpu summing.
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+
+#include <net/ip_vs.h>
+
+/*
+  This code is to estimate rate in a shorter interval (such as 8
+  seconds) for virtual services and real servers. For measure rate in a
+  long interval, it is easy to implement a user level daemon which
+  periodically reads those statistical counters and measure rate.
+
+  Currently, the measurement is activated by slow timer handler. Hope
+  this measurement will not introduce too much load.
+
+  We measure rate during the last 8 seconds every 2 seconds:
+
+    avgrate = avgrate*(1-W) + rate*W
+
+    where W = 2^(-2)
+
+  NOTES.
+
+  * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
+
+  * Netlink users can see 64-bit values but sockopt users are restricted
+    to 32-bit values for conns, packets, bps, cps and pps.
+
+  * A lot of code is taken from net/core/gen_estimator.c
+ */
+
+
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
+				 struct ip_vs_cpu_stats __percpu *stats)
+{
+	int i;
+	bool add = false;
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+		unsigned int start;
+		u64 conns, inpkts, outpkts, inbytes, outbytes;
+
+		if (add) {
+			do {
+				start = u64_stats_fetch_begin(&s->syncp);
+				conns = u64_stats_read(&s->cnt.conns);
+				inpkts = u64_stats_read(&s->cnt.inpkts);
+				outpkts = u64_stats_read(&s->cnt.outpkts);
+				inbytes = u64_stats_read(&s->cnt.inbytes);
+				outbytes = u64_stats_read(&s->cnt.outbytes);
+			} while (u64_stats_fetch_retry(&s->syncp, start));
+			sum->conns += conns;
+			sum->inpkts += inpkts;
+			sum->outpkts += outpkts;
+			sum->inbytes += inbytes;
+			sum->outbytes += outbytes;
+		} else {
+			add = true;
+			do {
+				start = u64_stats_fetch_begin(&s->syncp);
+				sum->conns = u64_stats_read(&s->cnt.conns);
+				sum->inpkts = u64_stats_read(&s->cnt.inpkts);
+				sum->outpkts = u64_stats_read(&s->cnt.outpkts);
+				sum->inbytes = u64_stats_read(&s->cnt.inbytes);
+				sum->outbytes = u64_stats_read(&s->cnt.outbytes);
+			} while (u64_stats_fetch_retry(&s->syncp, start));
+		}
+	}
+}
+
+
+static void estimation_timer(struct timer_list *t)
+{
+	struct ip_vs_estimator *e;
+	struct ip_vs_stats *s;
+	u64 rate;
+	struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
+
+	if (!sysctl_run_estimation(ipvs))
+		goto skip;
+
+	spin_lock(&ipvs->est_lock);
+	list_for_each_entry(e, &ipvs->est_list, list) {
+		s = container_of(e, struct ip_vs_stats, est);
+
+		spin_lock(&s->lock);
+		ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
+
+		/* scaled by 2^10, but divided 2 seconds */
+		rate = (s->kstats.conns - e->last_conns) << 9;
+		e->last_conns = s->kstats.conns;
+		e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+		rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+		e->last_inpkts = s->kstats.inpkts;
+		e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+		rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+		e->last_outpkts = s->kstats.outpkts;
+		e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+		/* scaled by 2^5, but divided 2 seconds */
+		rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+		e->last_inbytes = s->kstats.inbytes;
+		e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+		rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+		e->last_outbytes = s->kstats.outbytes;
+		e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
+		spin_unlock(&s->lock);
+	}
+	spin_unlock(&ipvs->est_lock);
+
+skip:
+	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
+}
+
+void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	INIT_LIST_HEAD(&est->list);
+
+	spin_lock_bh(&ipvs->est_lock);
+	list_add(&est->list, &ipvs->est_list);
+	spin_unlock_bh(&ipvs->est_lock);
+}
+
+void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	spin_lock_bh(&ipvs->est_lock);
+	list_del(&est->list);
+	spin_unlock_bh(&ipvs->est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+	struct ip_vs_kstats *k = &stats->kstats;
+
+	/* reset counters, caller must hold the stats->lock lock */
+	est->last_inbytes = k->inbytes;
+	est->last_outbytes = k->outbytes;
+	est->last_conns = k->conns;
+	est->last_inpkts = k->inpkts;
+	est->last_outpkts = k->outpkts;
+	est->cps = 0;
+	est->inpps = 0;
+	est->outpps = 0;
+	est->inbps = 0;
+	est->outbps = 0;
+}
+
+/* Get decoded rates */
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *e = &stats->est;
+
+	dst->cps = (e->cps + 0x1FF) >> 10;
+	dst->inpps = (e->inpps + 0x1FF) >> 10;
+	dst->outpps = (e->outpps + 0x1FF) >> 10;
+	dst->inbps = (e->inbps + 0xF) >> 5;
+	dst->outbps = (e->outbps + 0xF) >> 5;
+}
+
+int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
+{
+	INIT_LIST_HEAD(&ipvs->est_list);
+	spin_lock_init(&ipvs->est_lock);
+	timer_setup(&ipvs->est_timer, estimation_timer, 0);
+	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
+	return 0;
+}
+
+void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
+{
+	del_timer_sync(&ipvs->est_timer);
+}
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
new file mode 100644
index 000000000..b846cc385
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Weighted Fail Over module
+ *
+ * Authors:     Kenny Mathis <kmathis@chokepoint.net>
+ *
+ * Changes:
+ *     Kenny Mathis            :     added initial functionality based on weight
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *hweight = NULL;
+	int hw = 0; /* Track highest weight */
+
+	IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+	/* Basic failover functionality
+	 * Find virtual server with highest weight and send it traffic
+	 */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > hw) {
+			hweight = dest;
+			hw = atomic_read(&dest->weight);
+		}
+	}
+
+	if (hweight) {
+		IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+			      IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+			      ntohs(hweight->port),
+			      atomic_read(&hweight->activeconns),
+			      atomic_read(&hweight->weight));
+		return hweight;
+	}
+
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+	.name =			"fo",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+	.schedule =		ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
new file mode 100644
index 000000000..ef1f45e43
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_ftp.c: IPVS ftp application module
+ *
+ * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/gfp.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/unaligned.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING_PASV "227 "
+#define CLIENT_STRING_PORT "PORT"
+#define SERVER_STRING_EPSV "229 "
+#define CLIENT_STRING_EPRT "EPRT"
+
+enum {
+	IP_VS_FTP_ACTIVE = 0,
+	IP_VS_FTP_PORT = 0,
+	IP_VS_FTP_PASV,
+	IP_VS_FTP_EPRT,
+	IP_VS_FTP_EPSV,
+};
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static unsigned int ports_count = 1;
+static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, ushort, &ports_count, 0444);
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
+
+
+static char *ip_vs_ftp_data_ptr(struct sk_buff *skb, struct ip_vs_iphdr *ipvsh)
+{
+	struct tcphdr *th = (struct tcphdr *)((char *)skb->data + ipvsh->len);
+
+	if ((th->doff << 2) < sizeof(struct tcphdr))
+		return NULL;
+
+	return (char *)th + (th->doff << 2);
+}
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	/* We use connection tracking for the command connection */
+	cp->flags |= IP_VS_CONN_F_NFCT;
+	return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+/* Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern". <addr,port> is in network order.
+ * Parse extended format depending on ext. In this case addr can be pre-set.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+				  const char *pattern, size_t plen,
+				  char skip, bool ext, int mode,
+				  union nf_inet_addr *addr, __be16 *port,
+				  __u16 af, char **start, char **end)
+{
+	char *s, c;
+	unsigned char p[6];
+	char edelim;
+	__u16 hport;
+	int i = 0;
+
+	if (data_limit - data < plen) {
+		/* check if there is partial match */
+		if (strncasecmp(data, pattern, data_limit - data) == 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	if (strncasecmp(data, pattern, plen) != 0) {
+		return 0;
+	}
+	s = data + plen;
+	if (skip) {
+		bool found = false;
+
+		for (;; s++) {
+			if (s == data_limit)
+				return -1;
+			if (!found) {
+				/* "(" is optional for non-extended format,
+				 * so catch the start of IPv4 address
+				 */
+				if (!ext && isdigit(*s))
+					break;
+				if (*s == skip)
+					found = true;
+			} else if (*s != skip) {
+				break;
+			}
+		}
+	}
+	/* Old IPv4-only format? */
+	if (!ext) {
+		p[0] = 0;
+		for (data = s; ; data++) {
+			if (data == data_limit)
+				return -1;
+			c = *data;
+			if (isdigit(c)) {
+				p[i] = p[i]*10 + c - '0';
+			} else if (c == ',' && i < 5) {
+				i++;
+				p[i] = 0;
+			} else {
+				/* unexpected character or terminator */
+				break;
+			}
+		}
+
+		if (i != 5)
+			return -1;
+
+		*start = s;
+		*end = data;
+		addr->ip = get_unaligned((__be32 *) p);
+		*port = get_unaligned((__be16 *) (p + 4));
+		return 1;
+	}
+	if (s == data_limit)
+		return -1;
+	*start = s;
+	edelim = *s++;
+	if (edelim < 33 || edelim > 126)
+		return -1;
+	if (s == data_limit)
+		return -1;
+	if (*s == edelim) {
+		/* Address family is usually missing for EPSV response */
+		if (mode != IP_VS_FTP_EPSV)
+			return -1;
+		s++;
+		if (s == data_limit)
+			return -1;
+		/* Then address should be missing too */
+		if (*s != edelim)
+			return -1;
+		/* Caller can pre-set addr, if needed */
+		s++;
+	} else {
+		const char *ep;
+
+		/* We allow address only from same family */
+		if (af == AF_INET6 && *s != '2')
+			return -1;
+		if (af == AF_INET && *s != '1')
+			return -1;
+		s++;
+		if (s == data_limit)
+			return -1;
+		if (*s != edelim)
+			return -1;
+		s++;
+		if (s == data_limit)
+			return -1;
+		if (af == AF_INET6) {
+			if (in6_pton(s, data_limit - s, (u8 *)addr, edelim,
+				     &ep) <= 0)
+				return -1;
+		} else {
+			if (in4_pton(s, data_limit - s, (u8 *)addr, edelim,
+				     &ep) <= 0)
+				return -1;
+		}
+		s = (char *) ep;
+		if (s == data_limit)
+			return -1;
+		if (*s != edelim)
+			return -1;
+		s++;
+	}
+	for (hport = 0; ; s++)
+	{
+		if (s == data_limit)
+			return -1;
+		if (!isdigit(*s))
+			break;
+		hport = hport * 10 + *s - '0';
+	}
+	if (s == data_limit || !hport || *s != edelim)
+		return -1;
+	s++;
+	*end = s;
+	*port = htons(hport);
+	return 1;
+}
+
+/* Look at outgoing ftp packets to catch the response to a PASV/EPSV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port.  Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ * The extended format for EPSV response provides usually only port:
+ *   "229 Entering Extended Passive Mode (|||ppp|)"
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			 struct sk_buff *skb, int *diff,
+			 struct ip_vs_iphdr *ipvsh)
+{
+	char *data, *data_limit;
+	char *start, *end;
+	union nf_inet_addr from;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+	unsigned int buf_len;
+	int ret = 0;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (skb_ensure_writable(skb, skb->len))
+		return 0;
+
+	if (cp->app_data == (void *) IP_VS_FTP_PASV) {
+		data = ip_vs_ftp_data_ptr(skb, ipvsh);
+		data_limit = skb_tail_pointer(skb);
+
+		if (!data || data >= data_limit)
+			return 1;
+
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING_PASV,
+					   sizeof(SERVER_STRING_PASV)-1,
+					   '(', false, IP_VS_FTP_PASV,
+					   &from, &port, cp->af,
+					   &start, &end) != 1)
+			return 1;
+
+		IP_VS_DBG(7, "PASV response (%pI4:%u) -> %pI4:%u detected\n",
+			  &from.ip, ntohs(port), &cp->caddr.ip, 0);
+	} else if (cp->app_data == (void *) IP_VS_FTP_EPSV) {
+		data = ip_vs_ftp_data_ptr(skb, ipvsh);
+		data_limit = skb_tail_pointer(skb);
+
+		if (!data || data >= data_limit)
+			return 1;
+
+		/* Usually, data address is not specified but
+		 * we support different address, so pre-set it.
+		 */
+		from = cp->daddr;
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING_EPSV,
+					   sizeof(SERVER_STRING_EPSV)-1,
+					   '(', true, IP_VS_FTP_EPSV,
+					   &from, &port, cp->af,
+					   &start, &end) != 1)
+			return 1;
+
+		IP_VS_DBG_BUF(7, "EPSV response (%s:%u) -> %s:%u detected\n",
+			      IP_VS_DBG_ADDR(cp->af, &from), ntohs(port),
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr), 0);
+	} else {
+		return 1;
+	}
+
+	/* Now update or create a connection entry for it */
+	{
+		struct ip_vs_conn_param p;
+
+		ip_vs_conn_fill_param(cp->ipvs, cp->af,
+				      ipvsh->protocol, &from, port,
+				      &cp->caddr, 0, &p);
+		n_cp = ip_vs_conn_out_get(&p);
+	}
+	if (!n_cp) {
+		struct ip_vs_conn_param p;
+
+		ip_vs_conn_fill_param(cp->ipvs,
+				      cp->af, ipvsh->protocol, &cp->caddr,
+				      0, &cp->vaddr, port, &p);
+		n_cp = ip_vs_conn_new(&p, cp->af, &from, port,
+				      IP_VS_CONN_F_NO_CPORT |
+				      IP_VS_CONN_F_NFCT,
+				      cp->dest, skb->mark);
+		if (!n_cp)
+			return 0;
+
+		/* add its controller */
+		ip_vs_control_add(n_cp, cp);
+	}
+
+	/* Replace the old passive address with the new one */
+	if (cp->app_data == (void *) IP_VS_FTP_PASV) {
+		from.ip = n_cp->vaddr.ip;
+		port = n_cp->vport;
+		snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u",
+			 ((unsigned char *)&from.ip)[0],
+			 ((unsigned char *)&from.ip)[1],
+			 ((unsigned char *)&from.ip)[2],
+			 ((unsigned char *)&from.ip)[3],
+			 ntohs(port) >> 8,
+			 ntohs(port) & 0xFF);
+	} else if (cp->app_data == (void *) IP_VS_FTP_EPSV) {
+		from = n_cp->vaddr;
+		port = n_cp->vport;
+		/* Only port, client will use VIP for the data connection */
+		snprintf(buf, sizeof(buf), "|||%u|",
+			 ntohs(port));
+	} else {
+		*buf = 0;
+	}
+	buf_len = strlen(buf);
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		bool mangled;
+
+		/* If mangling fails this function will return 0
+		 * which will cause the packet to be dropped.
+		 * Mangling can only fail under memory pressure,
+		 * hopefully it will succeed on the retransmitted
+		 * packet.
+		 */
+		mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						   ipvsh->len,
+						   start - data,
+						   end - start,
+						   buf, buf_len);
+		if (mangled) {
+			ip_vs_nfct_expect_related(skb, ct, n_cp,
+						  ipvsh->protocol, 0, 0);
+			if (skb->ip_summed == CHECKSUM_COMPLETE)
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+			/* csum is updated */
+			ret = 1;
+		}
+	}
+
+	/* Not setting 'diff' is intentional, otherwise the sequence
+	 * would be adjusted twice.
+	 */
+
+	cp->app_data = (void *) IP_VS_FTP_ACTIVE;
+	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+	return ret;
+}
+
+
+/* Look at incoming ftp packets to catch the PASV/PORT/EPRT/EPSV command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ * Extended format:
+ *	"EPSV\r\n" when client requests server address from same family
+ *	"EPSV 1\r\n" when client requests IPv4 server address
+ *	"EPSV 2\r\n" when client requests IPv6 server address
+ *	"EPSV ALL\r\n" - not supported
+ *	EPRT with specified delimiter (ASCII 33..126), "|" by default:
+ *	"EPRT |1|IPv4ADDR|PORT|\r\n" when client provides IPv4 addrport
+ *	"EPRT |2|IPv6ADDR|PORT|\r\n" when client provides IPv6 addrport
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			struct sk_buff *skb, int *diff,
+			struct ip_vs_iphdr *ipvsh)
+{
+	char *data, *data_start, *data_limit;
+	char *start, *end;
+	union nf_inet_addr to;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+
+	/* no diff required for incoming packets */
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (skb_ensure_writable(skb, skb->len))
+		return 0;
+
+	data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
+	data_limit = skb_tail_pointer(skb);
+	if (!data || data >= data_limit)
+		return 1;
+
+	while (data <= data_limit - 6) {
+		if (cp->af == AF_INET &&
+		    strncasecmp(data, "PASV\r\n", 6) == 0) {
+			/* Passive mode on */
+			IP_VS_DBG(7, "got PASV at %td of %td\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = (void *) IP_VS_FTP_PASV;
+			return 1;
+		}
+
+		/* EPSV or EPSV<space><net-prt> */
+		if (strncasecmp(data, "EPSV", 4) == 0 &&
+		    (data[4] == ' ' || data[4] == '\r')) {
+			if (data[4] == ' ') {
+				char proto = data[5];
+
+				if (data > data_limit - 7 || data[6] != '\r')
+					return 1;
+
+#ifdef CONFIG_IP_VS_IPV6
+				if (cp->af == AF_INET6 && proto == '2') {
+				} else
+#endif
+				if (cp->af == AF_INET && proto == '1') {
+				} else {
+					return 1;
+				}
+			}
+			/* Extended Passive mode on */
+			IP_VS_DBG(7, "got EPSV at %td of %td\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = (void *) IP_VS_FTP_EPSV;
+			return 1;
+		}
+
+		data++;
+	}
+
+	/*
+	 * To support virtual FTP server, the scenerio is as follows:
+	 *       FTP client ----> Load Balancer ----> FTP server
+	 * First detect the port number in the application data,
+	 * then create a new connection entry for the coming data
+	 * connection.
+	 */
+	if (cp->af == AF_INET &&
+	    ip_vs_ftp_get_addrport(data_start, data_limit,
+				   CLIENT_STRING_PORT,
+				   sizeof(CLIENT_STRING_PORT)-1,
+				   ' ', false, IP_VS_FTP_PORT,
+				   &to, &port, cp->af,
+				   &start, &end) == 1) {
+
+		IP_VS_DBG(7, "PORT %pI4:%u detected\n", &to.ip, ntohs(port));
+
+		/* Now update or create a connection entry for it */
+		IP_VS_DBG(7, "protocol %s %pI4:%u %pI4:%u\n",
+			  ip_vs_proto_name(ipvsh->protocol),
+			  &to.ip, ntohs(port), &cp->vaddr.ip,
+			  ntohs(cp->vport)-1);
+	} else if (ip_vs_ftp_get_addrport(data_start, data_limit,
+					  CLIENT_STRING_EPRT,
+					  sizeof(CLIENT_STRING_EPRT)-1,
+					  ' ', true, IP_VS_FTP_EPRT,
+					  &to, &port, cp->af,
+					  &start, &end) == 1) {
+
+		IP_VS_DBG_BUF(7, "EPRT %s:%u detected\n",
+			      IP_VS_DBG_ADDR(cp->af, &to), ntohs(port));
+
+		/* Now update or create a connection entry for it */
+		IP_VS_DBG_BUF(7, "protocol %s %s:%u %s:%u\n",
+			      ip_vs_proto_name(ipvsh->protocol),
+			      IP_VS_DBG_ADDR(cp->af, &to), ntohs(port),
+			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+			      ntohs(cp->vport)-1);
+	} else {
+		return 1;
+	}
+
+	/* Passive mode off */
+	cp->app_data = (void *) IP_VS_FTP_ACTIVE;
+
+	{
+		struct ip_vs_conn_param p;
+		ip_vs_conn_fill_param(cp->ipvs, cp->af,
+				      ipvsh->protocol, &to, port, &cp->vaddr,
+				      htons(ntohs(cp->vport)-1), &p);
+		n_cp = ip_vs_conn_in_get(&p);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(&p, cp->af, &cp->daddr,
+					      htons(ntohs(cp->dport)-1),
+					      IP_VS_CONN_F_NFCT, cp->dest,
+					      skb->mark);
+			if (!n_cp)
+				return 0;
+
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+		}
+	}
+
+	/*
+	 *	Move tunnel to listen state
+	 */
+	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+
+	return 1;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+	.name =		"ftp",
+	.type =		IP_VS_APP_TYPE_FTP,
+	.protocol =	IPPROTO_TCP,
+	.module =	THIS_MODULE,
+	.incs_list =	LIST_HEAD_INIT(ip_vs_ftp.incs_list),
+	.init_conn =	ip_vs_ftp_init_conn,
+	.done_conn =	ip_vs_ftp_done_conn,
+	.bind_conn =	NULL,
+	.unbind_conn =	NULL,
+	.pkt_out =	ip_vs_ftp_out,
+	.pkt_in =	ip_vs_ftp_in,
+};
+
+/*
+ *	per netns ip_vs_ftp initialization
+ */
+static int __net_init __ip_vs_ftp_init(struct net *net)
+{
+	int i, ret;
+	struct ip_vs_app *app;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs)
+		return -ENOENT;
+
+	app = register_ip_vs_app(ipvs, &ip_vs_ftp);
+	if (IS_ERR(app))
+		return PTR_ERR(app);
+
+	for (i = 0; i < ports_count; i++) {
+		if (!ports[i])
+			continue;
+		ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
+		if (ret)
+			goto err_unreg;
+	}
+	return 0;
+
+err_unreg:
+	unregister_ip_vs_app(ipvs, &ip_vs_ftp);
+	return ret;
+}
+/*
+ *	netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs)
+		return;
+
+	unregister_ip_vs_app(ipvs, &ip_vs_ftp);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+	.init = __ip_vs_ftp_init,
+	.exit = __ip_vs_ftp_exit,
+};
+
+static int __init ip_vs_ftp_init(void)
+{
+	/* rcu_barrier() is called by netns on error */
+	return register_pernet_subsys(&ip_vs_ftp_ops);
+}
+
+/*
+ *	ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+	unregister_pernet_subsys(&ip_vs_ftp_ops);
+	/* rcu_barrier() is called by netns */
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
new file mode 100644
index 000000000..7ac7473e3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Locality-Based Least-Connection scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ * Changes:
+ *     Martin Hamilton         :    fixed the terrible locking bugs
+ *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
+ *     Wensong Zhang           :    fixed the uninitialized tbl->lock bug
+ *     Wensong Zhang           :    added doing full expiration check to
+ *                                   collect stale entries of 24+ hours when
+ *                                   no partial expire check in a half hour
+ *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
+ *                                   to avoid the possible race between timer
+ *                                   handler and del_timer thread in SMP
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ *       if cachenode[dest_ip] is null then
+ *               n, cachenode[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- cachenode[dest_ip];
+ *               if (n is dead) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                 n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ *       return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+#include <linux/hash.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblc entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+#define DEFAULT_EXPIRATION	(24*60*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+
+
+/*
+ *     for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS      10
+#endif
+#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS lblc entry represents an association between destination
+ *      IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+	struct hlist_node	list;
+	int			af;		/* address family */
+	union nf_inet_addr      addr;           /* destination IP address */
+	struct ip_vs_dest	*dest;          /* real server (cache) */
+	unsigned long           lastuse;        /* last used time */
+	struct rcu_head		rcu_head;
+};
+
+
+/*
+ *      IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+	struct rcu_head		rcu_head;
+	struct hlist_head	bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	struct ip_vs_service	*svc;		/* pointer back to service */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+	bool			dead;
+};
+
+
+/*
+ *      IPVS LBLC sysctl table
+ */
+#ifdef CONFIG_SYSCTL
+static struct ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblc_expiration",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif
+
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_lblc_entry *en = container_of(head,
+						   struct ip_vs_lblc_entry,
+						   rcu_head);
+
+	ip_vs_dest_put_and_free(en->dest);
+	kfree(en);
+}
+
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+	hlist_del_rcu(&en->list);
+	call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
+
+/*
+ *	Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned int
+ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
+{
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+	return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+	unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
+
+	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/* Get ip_vs_lblc_entry associated with supplied parameters. */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
+	       const union nf_inet_addr *addr)
+{
+	unsigned int hash = ip_vs_lblc_hashkey(af, addr);
+	struct ip_vs_lblc_entry *en;
+
+	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
+		if (ip_vs_addr_equal(af, &en->addr, addr))
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under spin lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
+	       u16 af, struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = ip_vs_lblc_get(af, tbl, daddr);
+	if (en) {
+		if (en->dest == dest)
+			return en;
+		ip_vs_lblc_del(en);
+	}
+	en = kmalloc(sizeof(*en), GFP_ATOMIC);
+	if (!en)
+		return NULL;
+
+	en->af = af;
+	ip_vs_addr_copy(af, &en->addr, daddr);
+	en->lastuse = jiffies;
+
+	ip_vs_dest_hold(dest);
+	en->dest = dest;
+
+	ip_vs_lblc_hash(tbl, en);
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
+	int i;
+
+	spin_lock_bh(&svc->sched_lock);
+	tbl->dead = true;
+	for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
+			ip_vs_lblc_del(en);
+			atomic_dec(&tbl->entries);
+		}
+	}
+	spin_unlock_bh(&svc->sched_lock);
+}
+
+static int sysctl_lblc_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+	return svc->ipvs->sysctl_lblc_expiration;
+#else
+	return DEFAULT_EXPIRATION;
+#endif
+}
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
+	unsigned long now = jiffies;
+	int i, j;
+
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
+			if (time_before(now,
+					en->lastuse +
+					sysctl_lblc_expiration(svc)))
+				continue;
+
+			ip_vs_lblc_del(en);
+			atomic_dec(&tbl->entries);
+		}
+		spin_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblc table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(struct timer_list *t)
+{
+	struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer);
+	struct ip_vs_service *svc = tbl->svc;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblc_entry *en;
+	struct hlist_node *next;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblc_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblc_del(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		spin_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblc_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblc_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+	if (tbl == NULL)
+		return -ENOMEM;
+
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
+		INIT_HLIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+	tbl->dead = false;
+	tbl->svc = svc;
+	atomic_set(&tbl->entries, 0);
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblc_flush(svc);
+
+	/* release the table itself */
+	kfree_rcu(tbl, rcu_head);
+	IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) released\n",
+		  sizeof(*tbl));
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblc_schedule(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We use the following formula to estimate the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_dest_conn_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = ip_vs_dest_conn_overhead(dest);
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "LBLC: server %s:%d "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry_rcu(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		    struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblc_entry *en;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/* First look in our cache */
+	en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr);
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/*
+		 * If the destination is not available, i.e. it's in the trash,
+		 * we must ignore it, as it may be removed from under our feet,
+		 * if someone drops our reference count. Our caller only makes
+		 * sure that destinations, that are not in the trash, are not
+		 * moved to the trash, while we are scheduling. But anyone can
+		 * free up entries from the trash at any time.
+		 */
+
+		dest = en->dest;
+		if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
+		    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+			goto out;
+	}
+
+	/* No cache entry or it is invalid, time to schedule */
+	dest = __ip_vs_lblc_schedule(svc);
+	if (!dest) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	spin_lock_bh(&svc->sched_lock);
+	if (!tbl->dead)
+		ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
+	spin_unlock_bh(&svc->sched_lock);
+
+out:
+	IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
+	.name =			"lblc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+	.init_service =		ip_vs_lblc_init_svc,
+	.done_service =		ip_vs_lblc_done_svc,
+	.schedule =		ip_vs_lblc_schedule,
+};
+
+/*
+ *  per netns init.
+ */
+#ifdef CONFIG_SYSCTL
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs)
+		return -ENOENT;
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblc_ctl_table == NULL)
+			return -ENOMEM;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			ipvs->lblc_ctl_table[0].procname = NULL;
+
+	} else
+		ipvs->lblc_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
+	ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+	ipvs->lblc_ctl_header =
+		register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table);
+	if (!ipvs->lblc_ctl_header) {
+		if (!net_eq(net, &init_net))
+			kfree(ipvs->lblc_ctl_table);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblc_ctl_table);
+}
+
+#else
+
+static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
+
+#endif
+
+static struct pernet_operations ip_vs_lblc_ops = {
+	.init = __ip_vs_lblc_init,
+	.exit = __ip_vs_lblc_exit,
+};
+
+static int __init ip_vs_lblc_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&ip_vs_lblc_ops);
+	if (ret)
+		return ret;
+
+	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	if (ret)
+		unregister_pernet_subsys(&ip_vs_lblc_ops);
+	return ret;
+}
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblc_ops);
+	rcu_barrier();
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
new file mode 100644
index 000000000..77c323c36
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Locality-Based Least-Connection with Replication scheduler
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ * Changes:
+ *     Julian Anastasov        :    Added the missing (dest->weight>0)
+ *                                  condition in the ip_vs_dest_set_max.
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ *       if serverSet[dest_ip] is null then
+ *               n, serverSet[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- {least-conn (alive) node in serverSet[dest_ip]};
+ *               if (n is null) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                   n <- {weighted least-conn node};
+ *                   add n to serverSet[dest_ip];
+ *               if |serverSet[dest_ip]| > 1 AND
+ *                   now - serverSet[dest_ip].lastMod > T then
+ *                   m <- {most conn node in serverSet[dest_ip]};
+ *                   remove m from serverSet[dest_ip];
+ *       if serverSet[dest_ip] changed then
+ *               serverSet[dest_ip].lastMod <- now;
+ *
+ *       return n;
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblcr entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+#define DEFAULT_EXPIRATION	(24*60*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+
+/*
+ *     for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
+#endif
+#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS destination set structure and operations
+ */
+struct ip_vs_dest_set_elem {
+	struct list_head	list;          /* list link */
+	struct ip_vs_dest	*dest;		/* destination server */
+	struct rcu_head		rcu_head;
+};
+
+struct ip_vs_dest_set {
+	atomic_t                size;           /* set size */
+	unsigned long           lastmod;        /* last modified time */
+	struct list_head	list;           /* destination list */
+};
+
+
+static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
+				  struct ip_vs_dest *dest, bool check)
+{
+	struct ip_vs_dest_set_elem *e;
+
+	if (check) {
+		list_for_each_entry(e, &set->list, list) {
+			if (e->dest == dest)
+				return;
+		}
+	}
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (e == NULL)
+		return;
+
+	ip_vs_dest_hold(dest);
+	e->dest = dest;
+
+	list_add_rcu(&e->list, &set->list);
+	atomic_inc(&set->size);
+
+	set->lastmod = jiffies;
+}
+
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+	struct ip_vs_dest_set_elem *e;
+
+	e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+	ip_vs_dest_put_and_free(e->dest);
+	kfree(e);
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_set_elem *e;
+
+	list_for_each_entry(e, &set->list, list) {
+		if (e->dest == dest) {
+			/* HIT */
+			atomic_dec(&set->size);
+			set->lastmod = jiffies;
+			list_del_rcu(&e->list);
+			call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
+			break;
+		}
+	}
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_set_elem *e, *ep;
+
+	list_for_each_entry_safe(e, ep, &set->list, list) {
+		list_del_rcu(&e->list);
+		call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
+	}
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_set_elem *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/* select the first destination server, whose weight > 0 */
+	list_for_each_entry_rcu(e, &set->list, list) {
+		least = e->dest;
+		if (least->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if ((atomic_read(&least->weight) > 0)
+		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+			loh = ip_vs_dest_conn_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted least load */
+  nextstage:
+	list_for_each_entry_continue_rcu(e, &set->list, list) {
+		dest = e->dest;
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = ip_vs_dest_conn_overhead(dest);
+		if (((__s64)loh * atomic_read(&dest->weight) >
+		     (__s64)doh * atomic_read(&least->weight))
+		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      __func__,
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+	return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_set_elem *e;
+	struct ip_vs_dest *dest, *most;
+	int moh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	/* select the first destination server, whose weight > 0 */
+	list_for_each_entry(e, &set->list, list) {
+		most = e->dest;
+		if (atomic_read(&most->weight) > 0) {
+			moh = ip_vs_dest_conn_overhead(most);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted most load */
+  nextstage:
+	list_for_each_entry_continue(e, &set->list, list) {
+		dest = e->dest;
+		doh = ip_vs_dest_conn_overhead(dest);
+		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+		if (((__s64)moh * atomic_read(&dest->weight) <
+		     (__s64)doh * atomic_read(&most->weight))
+		    && (atomic_read(&dest->weight) > 0)) {
+			most = dest;
+			moh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      __func__,
+		      IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
+		      atomic_read(&most->activeconns),
+		      refcount_read(&most->refcnt),
+		      atomic_read(&most->weight), moh);
+	return most;
+}
+
+
+/*
+ *      IPVS lblcr entry represents an association between destination
+ *      IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+	struct hlist_node       list;
+	int			af;		/* address family */
+	union nf_inet_addr      addr;           /* destination IP address */
+	struct ip_vs_dest_set   set;            /* destination server set */
+	unsigned long           lastuse;        /* last used time */
+	struct rcu_head		rcu_head;
+};
+
+
+/*
+ *      IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+	struct rcu_head		rcu_head;
+	struct hlist_head	bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	struct ip_vs_service	*svc;		/* pointer back to service */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+	bool			dead;
+};
+
+
+#ifdef CONFIG_SYSCTL
+/*
+ *      IPVS LBLCR sysctl table
+ */
+
+static struct ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblcr_expiration",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+	hlist_del_rcu(&en->list);
+	ip_vs_dest_set_eraseall(&en->set);
+	kfree_rcu(en, rcu_head);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned int
+ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
+{
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+	return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+	unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
+
+	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/* Get ip_vs_lblcr_entry associated with supplied parameters. */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
+		const union nf_inet_addr *addr)
+{
+	unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
+	struct ip_vs_lblcr_entry *en;
+
+	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
+		if (ip_vs_addr_equal(af, &en->addr, addr))
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under spin lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
+		u16 af, struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = ip_vs_lblcr_get(af, tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en)
+			return NULL;
+
+		en->af = af;
+		ip_vs_addr_copy(af, &en->addr, daddr);
+		en->lastuse = jiffies;
+
+		/* initialize its dest set */
+		atomic_set(&(en->set.size), 0);
+		INIT_LIST_HEAD(&en->set.list);
+
+		ip_vs_dest_set_insert(&en->set, dest, false);
+
+		ip_vs_lblcr_hash(tbl, en);
+		return en;
+	}
+
+	ip_vs_dest_set_insert(&en->set, dest, true);
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	int i;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
+
+	spin_lock_bh(&svc->sched_lock);
+	tbl->dead = true;
+	for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
+			ip_vs_lblcr_free(en);
+		}
+	}
+	spin_unlock_bh(&svc->sched_lock);
+}
+
+static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+	return svc->ipvs->sysctl_lblcr_expiration;
+#else
+	return DEFAULT_EXPIRATION;
+#endif
+}
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int i, j;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
+
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
+			if (time_after(en->lastuse +
+				       sysctl_lblcr_expiration(svc), now))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		spin_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblcr table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(struct timer_list *t)
+{
+	struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer);
+	struct ip_vs_service *svc = tbl->svc;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblcr_entry *en;
+	struct hlist_node *next;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblcr_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		spin_lock(&svc->sched_lock);
+		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		spin_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblcr_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblcr_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+	if (tbl == NULL)
+		return -ENOMEM;
+
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
+		INIT_HLIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+	tbl->dead = false;
+	tbl->svc = svc;
+	atomic_set(&tbl->entries, 0);
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblcr_flush(svc);
+
+	/* release the table itself */
+	kfree_rcu(tbl, rcu_head);
+	IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) released\n",
+		  sizeof(*tbl));
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We use the following formula to estimate the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_dest_conn_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = ip_vs_dest_conn_overhead(dest);
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "LBLCR: server %s:%d "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry_rcu(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		     struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	struct ip_vs_dest *dest;
+	struct ip_vs_lblcr_entry *en;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/* First look in our cache */
+	en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr);
+	if (en) {
+		en->lastuse = jiffies;
+
+		/* Get the least loaded destination */
+		dest = ip_vs_dest_set_min(&en->set);
+
+		/* More than one destination + enough time passed by, cleanup */
+		if (atomic_read(&en->set.size) > 1 &&
+		    time_after(jiffies, en->set.lastmod +
+				sysctl_lblcr_expiration(svc))) {
+			spin_lock_bh(&svc->sched_lock);
+			if (atomic_read(&en->set.size) > 1) {
+				struct ip_vs_dest *m;
+
+				m = ip_vs_dest_set_max(&en->set);
+				if (m)
+					ip_vs_dest_set_erase(&en->set, m);
+			}
+			spin_unlock_bh(&svc->sched_lock);
+		}
+
+		/* If the destination is not overloaded, use it */
+		if (dest && !is_overloaded(dest, svc))
+			goto out;
+
+		/* The cache entry is invalid, time to schedule */
+		dest = __ip_vs_lblcr_schedule(svc);
+		if (!dest) {
+			ip_vs_scheduler_err(svc, "no destination available");
+			return NULL;
+		}
+
+		/* Update our cache entry */
+		spin_lock_bh(&svc->sched_lock);
+		if (!tbl->dead)
+			ip_vs_dest_set_insert(&en->set, dest, true);
+		spin_unlock_bh(&svc->sched_lock);
+		goto out;
+	}
+
+	/* No cache entry, time to schedule */
+	dest = __ip_vs_lblcr_schedule(svc);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	spin_lock_bh(&svc->sched_lock);
+	if (!tbl->dead)
+		ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
+	spin_unlock_bh(&svc->sched_lock);
+
+out:
+	IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+	.name =			"lblcr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+	.init_service =		ip_vs_lblcr_init_svc,
+	.done_service =		ip_vs_lblcr_done_svc,
+	.schedule =		ip_vs_lblcr_schedule,
+};
+
+/*
+ *  per netns init.
+ */
+#ifdef CONFIG_SYSCTL
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs)
+		return -ENOENT;
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblcr_ctl_table == NULL)
+			return -ENOMEM;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			ipvs->lblcr_ctl_table[0].procname = NULL;
+	} else
+		ipvs->lblcr_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
+	ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+	ipvs->lblcr_ctl_header =
+		register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table);
+	if (!ipvs->lblcr_ctl_header) {
+		if (!net_eq(net, &init_net))
+			kfree(ipvs->lblcr_ctl_table);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblcr_ctl_table);
+}
+
+#else
+
+static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
+
+#endif
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+	.init = __ip_vs_lblcr_init,
+	.exit = __ip_vs_lblcr_exit,
+};
+
+static int __init ip_vs_lblcr_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+	if (ret)
+		return ret;
+
+	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	if (ret)
+		unregister_pernet_subsys(&ip_vs_lblcr_ops);
+	return ret;
+}
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblcr_ops);
+	rcu_barrier();
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
new file mode 100644
index 000000000..9d34d81fc
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *     Wensong Zhang            :     added the ip_vs_lc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *	Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/*
+	 * Simply select the server with the least number of
+	 *        (activeconns<<5) + inactconns
+	 * Except whose weight is equal to zero.
+	 * If the weight is equal to zero, it means that the server is
+	 * quiesced, the existing connections to the server still get
+	 * served, but no new connection is assigned to the server.
+	 */
+
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->weight) == 0)
+			continue;
+		doh = ip_vs_dest_conn_overhead(dest);
+		if (!least || doh < loh) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (!least)
+		ip_vs_scheduler_err(svc, "no destination available");
+	else
+		IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
+			      "inactconns %d\n",
+			      IP_VS_DBG_ADDR(least->af, &least->addr),
+			      ntohs(least->port),
+			      atomic_read(&least->activeconns),
+			      atomic_read(&least->inactconns));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+	.name =			"lc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+	.schedule =		ip_vs_lc_schedule,
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
new file mode 100644
index 000000000..e3d7f5c87
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPVS:	Maglev Hashing scheduling module
+ *
+ * Authors:	Inju Song <inju.song@navercorp.com>
+ *
+ */
+
+/* The mh algorithm is to assign a preference list of all the lookup
+ * table positions to each destination and populate the table with
+ * the most-preferred position of destinations. Then it is to select
+ * destination with the hash key of source IP address through looking
+ * up a the lookup table.
+ *
+ * The algorithm is detailed in:
+ * [3.4 Consistent Hasing]
+https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+#include <linux/siphash.h>
+#include <linux/bitops.h>
+#include <linux/gcd.h>
+
+#define IP_VS_SVC_F_SCHED_MH_FALLBACK	IP_VS_SVC_F_SCHED1 /* MH fallback */
+#define IP_VS_SVC_F_SCHED_MH_PORT	IP_VS_SVC_F_SCHED2 /* MH use port */
+
+struct ip_vs_mh_lookup {
+	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
+};
+
+struct ip_vs_mh_dest_setup {
+	unsigned int	offset; /* starting offset */
+	unsigned int	skip;	/* skip */
+	unsigned int	perm;	/* next_offset */
+	int		turns;	/* weight / gcd() and rshift */
+};
+
+/* Available prime numbers for MH table */
+static int primes[] = {251, 509, 1021, 2039, 4093,
+		       8191, 16381, 32749, 65521, 131071};
+
+/* For IPVS MH entry hash table */
+#ifndef CONFIG_IP_VS_MH_TAB_INDEX
+#define CONFIG_IP_VS_MH_TAB_INDEX	12
+#endif
+#define IP_VS_MH_TAB_BITS		(CONFIG_IP_VS_MH_TAB_INDEX / 2)
+#define IP_VS_MH_TAB_INDEX		(CONFIG_IP_VS_MH_TAB_INDEX - 8)
+#define IP_VS_MH_TAB_SIZE               primes[IP_VS_MH_TAB_INDEX]
+
+struct ip_vs_mh_state {
+	struct rcu_head			rcu_head;
+	struct ip_vs_mh_lookup		*lookup;
+	struct ip_vs_mh_dest_setup	*dest_setup;
+	hsiphash_key_t			hash1, hash2;
+	int				gcd;
+	int				rshift;
+};
+
+static inline void generate_hash_secret(hsiphash_key_t *hash1,
+					hsiphash_key_t *hash2)
+{
+	hash1->key[0] = 2654435761UL;
+	hash1->key[1] = 2654435761UL;
+
+	hash2->key[0] = 2654446892UL;
+	hash2->key[1] = 2654446892UL;
+}
+
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->weight) <= 0 ||
+	       dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+/* Returns hash value for IPVS MH entry */
+static inline unsigned int
+ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
+		 __be16 port, hsiphash_key_t *key, unsigned int offset)
+{
+	unsigned int v;
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
+			    addr->ip6[2] ^ addr->ip6[3];
+#endif
+	v = (offset + ntohs(port) + ntohl(addr_fold));
+	return hsiphash(&v, sizeof(v), key);
+}
+
+/* Reset all the hash buckets of the specified table. */
+static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
+{
+	int i;
+	struct ip_vs_mh_lookup *l;
+	struct ip_vs_dest *dest;
+
+	l = &s->lookup[0];
+	for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
+		dest = rcu_dereference_protected(l->dest, 1);
+		if (dest) {
+			ip_vs_dest_put(dest);
+			RCU_INIT_POINTER(l->dest, NULL);
+		}
+		l++;
+	}
+}
+
+static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
+			      struct ip_vs_service *svc)
+{
+	struct list_head *p;
+	struct ip_vs_mh_dest_setup *ds;
+	struct ip_vs_dest *dest;
+	int lw;
+
+	/* If gcd is smaller then 1, number of dests or
+	 * all last_weight of dests are zero. So, skip
+	 * permutation for the dests.
+	 */
+	if (s->gcd < 1)
+		return 0;
+
+	/* Set dest_setup for the dests permutation */
+	p = &svc->destinations;
+	ds = &s->dest_setup[0];
+	while ((p = p->next) != &svc->destinations) {
+		dest = list_entry(p, struct ip_vs_dest, n_list);
+
+		ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
+					      dest->port, &s->hash1, 0) %
+					      IP_VS_MH_TAB_SIZE;
+		ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
+					    dest->port, &s->hash2, 0) %
+					    (IP_VS_MH_TAB_SIZE - 1) + 1;
+		ds->perm = ds->offset;
+
+		lw = atomic_read(&dest->last_weight);
+		ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
+		ds++;
+	}
+
+	return 0;
+}
+
+static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
+			     struct ip_vs_service *svc)
+{
+	int n, c, dt_count;
+	unsigned long *table;
+	struct list_head *p;
+	struct ip_vs_mh_dest_setup *ds;
+	struct ip_vs_dest *dest, *new_dest;
+
+	/* If gcd is smaller then 1, number of dests or
+	 * all last_weight of dests are zero. So, skip
+	 * the population for the dests and reset lookup table.
+	 */
+	if (s->gcd < 1) {
+		ip_vs_mh_reset(s);
+		return 0;
+	}
+
+	table = bitmap_zalloc(IP_VS_MH_TAB_SIZE, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	p = &svc->destinations;
+	n = 0;
+	dt_count = 0;
+	while (n < IP_VS_MH_TAB_SIZE) {
+		if (p == &svc->destinations)
+			p = p->next;
+
+		ds = &s->dest_setup[0];
+		while (p != &svc->destinations) {
+			/* Ignore added server with zero weight */
+			if (ds->turns < 1) {
+				p = p->next;
+				ds++;
+				continue;
+			}
+
+			c = ds->perm;
+			while (test_bit(c, table)) {
+				/* Add skip, mod IP_VS_MH_TAB_SIZE */
+				ds->perm += ds->skip;
+				if (ds->perm >= IP_VS_MH_TAB_SIZE)
+					ds->perm -= IP_VS_MH_TAB_SIZE;
+				c = ds->perm;
+			}
+
+			__set_bit(c, table);
+
+			dest = rcu_dereference_protected(s->lookup[c].dest, 1);
+			new_dest = list_entry(p, struct ip_vs_dest, n_list);
+			if (dest != new_dest) {
+				if (dest)
+					ip_vs_dest_put(dest);
+				ip_vs_dest_hold(new_dest);
+				RCU_INIT_POINTER(s->lookup[c].dest, new_dest);
+			}
+
+			if (++n == IP_VS_MH_TAB_SIZE)
+				goto out;
+
+			if (++dt_count >= ds->turns) {
+				dt_count = 0;
+				p = p->next;
+				ds++;
+			}
+		}
+	}
+
+out:
+	bitmap_free(table);
+	return 0;
+}
+
+/* Get ip_vs_dest associated with supplied parameters. */
+static inline struct ip_vs_dest *
+ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
+	     const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
+					     % IP_VS_MH_TAB_SIZE;
+	struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
+
+	return (!dest || is_unavailable(dest)) ? NULL : dest;
+}
+
+/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
+static inline struct ip_vs_dest *
+ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
+		      const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int offset, roffset;
+	unsigned int hash, ihash;
+	struct ip_vs_dest *dest;
+
+	/* First try the dest it's supposed to go to */
+	ihash = ip_vs_mh_hashkey(svc->af, addr, port,
+				 &s->hash1, 0) % IP_VS_MH_TAB_SIZE;
+	dest = rcu_dereference(s->lookup[ihash].dest);
+	if (!dest)
+		return NULL;
+	if (!is_unavailable(dest))
+		return dest;
+
+	IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
+
+	/* If the original dest is unavailable, loop around the table
+	 * starting from ihash to find a new dest
+	 */
+	for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
+		roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
+		hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
+					roffset) % IP_VS_MH_TAB_SIZE;
+		dest = rcu_dereference(s->lookup[hash].dest);
+		if (!dest)
+			break;
+		if (!is_unavailable(dest))
+			return dest;
+		IP_VS_DBG_BUF(6,
+			      "MH: selected unavailable server %s:%u (offset %u), reselecting",
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port), roffset);
+	}
+
+	return NULL;
+}
+
+/* Assign all the hash buckets of the specified table with the service. */
+static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
+			     struct ip_vs_service *svc)
+{
+	int ret;
+
+	if (svc->num_dests > IP_VS_MH_TAB_SIZE)
+		return -EINVAL;
+
+	if (svc->num_dests >= 1) {
+		s->dest_setup = kcalloc(svc->num_dests,
+					sizeof(struct ip_vs_mh_dest_setup),
+					GFP_KERNEL);
+		if (!s->dest_setup)
+			return -ENOMEM;
+	}
+
+	ip_vs_mh_permutate(s, svc);
+
+	ret = ip_vs_mh_populate(s, svc);
+	if (ret < 0)
+		goto out;
+
+	IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n",
+		      IP_VS_DBG_ADDR(svc->af, &svc->addr),
+		      ntohs(svc->port));
+
+out:
+	if (svc->num_dests >= 1) {
+		kfree(s->dest_setup);
+		s->dest_setup = NULL;
+	}
+	return ret;
+}
+
+static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		weight = atomic_read(&dest->last_weight);
+		if (weight > 0) {
+			if (g > 0)
+				g = gcd(weight, g);
+			else
+				g = weight;
+		}
+	}
+	return g;
+}
+
+/* To avoid assigning huge weight for the MH table,
+ * calculate shift value with gcd.
+ */
+static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
+{
+	struct ip_vs_dest *dest;
+	int new_weight, weight = 0;
+	int mw, shift;
+
+	/* If gcd is smaller then 1, number of dests or
+	 * all last_weight of dests are zero. So, return
+	 * shift value as zero.
+	 */
+	if (gcd < 1)
+		return 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		new_weight = atomic_read(&dest->last_weight);
+		if (new_weight > weight)
+			weight = new_weight;
+	}
+
+	/* Because gcd is greater than zero,
+	 * the maximum weight and gcd are always greater than zero
+	 */
+	mw = weight / gcd;
+
+	/* shift = occupied bits of weight/gcd - MH highest bits */
+	shift = fls(mw) - IP_VS_MH_TAB_BITS;
+	return (shift >= 0) ? shift : 0;
+}
+
+static void ip_vs_mh_state_free(struct rcu_head *head)
+{
+	struct ip_vs_mh_state *s;
+
+	s = container_of(head, struct ip_vs_mh_state, rcu_head);
+	kfree(s->lookup);
+	kfree(s);
+}
+
+static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
+{
+	int ret;
+	struct ip_vs_mh_state *s;
+
+	/* Allocate the MH table for this service */
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
+			    GFP_KERNEL);
+	if (!s->lookup) {
+		kfree(s);
+		return -ENOMEM;
+	}
+
+	generate_hash_secret(&s->hash1, &s->hash2);
+	s->gcd = ip_vs_mh_gcd_weight(svc);
+	s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
+
+	IP_VS_DBG(6,
+		  "MH lookup table (memory=%zdbytes) allocated for current service\n",
+		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
+
+	/* Assign the lookup table with current dests */
+	ret = ip_vs_mh_reassign(s, svc);
+	if (ret < 0) {
+		ip_vs_mh_reset(s);
+		ip_vs_mh_state_free(&s->rcu_head);
+		return ret;
+	}
+
+	/* No more failures, attach state */
+	svc->sched_data = s;
+	return 0;
+}
+
+static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_mh_state *s = svc->sched_data;
+
+	/* Got to clean up lookup entry here */
+	ip_vs_mh_reset(s);
+
+	call_rcu(&s->rcu_head, ip_vs_mh_state_free);
+	IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
+		  sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
+}
+
+static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
+{
+	struct ip_vs_mh_state *s = svc->sched_data;
+
+	s->gcd = ip_vs_mh_gcd_weight(svc);
+	s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
+
+	/* Assign the lookup table with the updated service */
+	return ip_vs_mh_reassign(s, svc);
+}
+
+/* Helper function to get port number */
+static inline __be16
+ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
+{
+	__be16 _ports[2], *ports;
+
+	/* At this point we know that we have a valid packet of some kind.
+	 * Because ICMP packets are only guaranteed to have the first 8
+	 * bytes, let's just grab the ports.  Fortunately they're in the
+	 * same position for all three of the protocols we care about.
+	 */
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_SCTP:
+		ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+					   &_ports);
+		if (unlikely(!ports))
+			return 0;
+
+		if (likely(!ip_vs_iph_inverse(iph)))
+			return ports[0];
+		else
+			return ports[1];
+	default:
+		return 0;
+	}
+}
+
+/* Maglev Hashing scheduling */
+static struct ip_vs_dest *
+ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_mh_state *s;
+	__be16 port = 0;
+	const union nf_inet_addr *hash_addr;
+
+	hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
+
+	IP_VS_DBG(6, "%s : Scheduling...\n", __func__);
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT)
+		port = ip_vs_mh_get_port(skb, iph);
+
+	s = (struct ip_vs_mh_state *)svc->sched_data;
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK)
+		dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port);
+	else
+		dest = ip_vs_mh_get(svc, s, hash_addr, port);
+
+	if (!dest) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+	IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n",
+		      IP_VS_DBG_ADDR(svc->af, hash_addr),
+		      ntohs(port),
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+		      ntohs(dest->port));
+
+	return dest;
+}
+
+/* IPVS MH Scheduler structure */
+static struct ip_vs_scheduler ip_vs_mh_scheduler = {
+	.name =			"mh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list	 =		LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list),
+	.init_service =		ip_vs_mh_init_svc,
+	.done_service =		ip_vs_mh_done_svc,
+	.add_dest =		ip_vs_mh_dest_changed,
+	.del_dest =		ip_vs_mh_dest_changed,
+	.upd_dest =		ip_vs_mh_dest_changed,
+	.schedule =		ip_vs_mh_schedule,
+};
+
+static int __init ip_vs_mh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_mh_scheduler);
+}
+
+static void __exit ip_vs_mh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_mh_scheduler);
+	rcu_barrier();
+}
+
+module_init(ip_vs_mh_init);
+module_exit(ip_vs_mh_cleanup);
+MODULE_DESCRIPTION("Maglev hashing ipvs scheduler");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>");
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000..08adcb222
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
+ *
+ * Portions Copyright (C) 2001-2002
+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
+ *
+ * Portions Copyright (C) 2003-2010
+ * Julian Anastasov
+ *
+ * Authors:
+ * Ben North <ben@redfrontdoor.org>
+ * Julian Anastasov <ja@ssi.bg>		Reorganize and sync with latest kernels
+ * Hannes Eder <heder@google.com>	Extend NFCT support for FTP, ipvs match
+ *
+ * Current status:
+ *
+ * - provide conntrack confirmation for new and related connections, by
+ * this way we can see their proper conntrack state in all hooks
+ * - support for all forwarding methods, not only NAT
+ * - FTP support (NAT), ability to support other NAT apps with expectations
+ * - to correctly create expectations for related NAT connections the proper
+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
+ * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
+ * NAT rules are needed)
+ * - alter reply for NAT when forwarding packet in original direction:
+ * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
+ * when RELATED conntrack is created from real server (Active FTP DATA)
+ * - if iptables_nat is not loaded the Passive FTP will not work (the
+ * PASV response can not be NAT-ed) but Active FTP should work
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ip_vs.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+
+#define FMT_TUPLE	"%s:%u->%s:%u/%u"
+#define ARG_TUPLE(T)	IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3),	\
+			ntohs((T)->src.u.all),				\
+			IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3),	\
+			ntohs((T)->dst.u.all),				\
+			(T)->dst.protonum
+
+#define FMT_CONN	"%s:%u->%s:%u->%s:%u/%u:%u"
+#define ARG_CONN(C)	IP_VS_DBG_ADDR((C)->af, &((C)->caddr)),		\
+			ntohs((C)->cport),				\
+			IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)),		\
+			ntohs((C)->vport),				\
+			IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)),	\
+			ntohs((C)->dport),				\
+			(C)->protocol, (C)->state
+
+void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conntrack_tuple new_tuple;
+
+	if (ct == NULL || nf_ct_is_confirmed(ct) ||
+	    nf_ct_is_dying(ct))
+		return;
+
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return;
+
+	/* Never alter conntrack for OPS conns (no reply is expected) */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return;
+
+	/* Alter reply only in original direction */
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		return;
+
+	/* Applications may adjust TCP seqs */
+	if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP &&
+	    !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct))
+		return;
+
+	/*
+	 * The connection is not yet in the hashtable, so we update it.
+	 * CIP->VIP will remain the same, so leave the tuple in
+	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
+	 * real-server we will see RIP->DIP.
+	 */
+	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+	/*
+	 * This will also take care of UDP and other protocols.
+	 */
+	if (outin) {
+		new_tuple.src.u3 = cp->daddr;
+		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
+		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
+			new_tuple.src.u.tcp.port = cp->dport;
+	} else {
+		new_tuple.dst.u3 = cp->vaddr;
+		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
+		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
+			new_tuple.dst.u.tcp.port = cp->vport;
+	}
+	IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
+		      "ctinfo=%d, old reply=" FMT_TUPLE "\n",
+		      __func__, ct, ct->status, ctinfo,
+		      ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple));
+	IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
+		      "ctinfo=%d, new reply=" FMT_TUPLE "\n",
+		      __func__, ct, ct->status, ctinfo,
+		      ARG_TUPLE(&new_tuple));
+	nf_conntrack_alter_reply(ct, &new_tuple);
+	IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n",
+		      __func__, ct, ARG_CONN(cp));
+}
+
+int ip_vs_confirm_conntrack(struct sk_buff *skb)
+{
+	return nf_conntrack_confirm(skb);
+}
+
+/*
+ * Called from init_conntrack() as expectfn handler.
+ */
+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
+	struct nf_conntrack_expect *exp)
+{
+	struct nf_conntrack_tuple *orig, new_reply;
+	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
+	struct net *net = nf_ct_net(ct);
+
+	/*
+	 * We assume that no NF locks are held before this callback.
+	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
+	 * expectations even if they use wildcard values, now we provide the
+	 * actual values from the newly created original conntrack direction.
+	 * The conntrack is confirmed when packet reaches IPVS hooks.
+	 */
+
+	/* RS->CLIENT */
+	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
+			      &orig->src.u3, orig->src.u.tcp.port,
+			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
+	cp = ip_vs_conn_out_get(&p);
+	if (cp) {
+		/* Change reply CLIENT->RS to CLIENT->VS */
+		IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp="
+			      FMT_CONN "\n",
+			      __func__, ct, ct->status, ARG_CONN(cp));
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
+			      FMT_TUPLE "\n",
+			      __func__, ct, ARG_TUPLE(&new_reply));
+		new_reply.dst.u3 = cp->vaddr;
+		new_reply.dst.u.tcp.port = cp->vport;
+		goto alter;
+	}
+
+	/* CLIENT->VS */
+	cp = ip_vs_conn_in_get(&p);
+	if (cp) {
+		/* Change reply VS->CLIENT to RS->CLIENT */
+		IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp="
+			      FMT_CONN "\n",
+			      __func__, ct, ct->status, ARG_CONN(cp));
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
+			      FMT_TUPLE "\n",
+			      __func__, ct, ARG_TUPLE(&new_reply));
+		new_reply.src.u3 = cp->daddr;
+		new_reply.src.u.tcp.port = cp->dport;
+		goto alter;
+	}
+
+	IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
+		      " - unknown expect\n",
+		      __func__, ct, ct->status, ARG_TUPLE(orig));
+	return;
+
+alter:
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
+		nf_conntrack_alter_reply(ct, &new_reply);
+	ip_vs_conn_put(cp);
+	return;
+}
+
+/*
+ * Create NF conntrack expectation with wildcard (optional) source port.
+ * Then the default callback function will alter the reply and will confirm
+ * the conntrack entry when the first packet comes.
+ * Use port 0 to expect connection from any port.
+ */
+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
+			       struct ip_vs_conn *cp, u_int8_t proto,
+			       const __be16 port, int from_rs)
+{
+	struct nf_conntrack_expect *exp;
+
+	if (ct == NULL)
+		return;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp)
+		return;
+
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			from_rs ? &cp->daddr : &cp->caddr,
+			from_rs ? &cp->caddr : &cp->vaddr,
+			proto, port ? &port : NULL,
+			from_rs ? &cp->cport : &cp->vport);
+
+	exp->expectfn = ip_vs_nfct_expect_callback;
+
+	IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
+		      __func__, ct, ARG_TUPLE(&exp->tuple));
+	nf_ct_expect_related(exp, 0);
+	nf_ct_expect_put(exp);
+}
+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
+
+/*
+ * Our connection was terminated, try to drop the conntrack immediately
+ */
+void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct nf_conntrack_tuple tuple;
+
+	if (!cp->cport)
+		return;
+
+	tuple = (struct nf_conntrack_tuple) {
+		.dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
+	tuple.src.u3 = cp->caddr;
+	tuple.src.u.all = cp->cport;
+	tuple.src.l3num = cp->af;
+	tuple.dst.u3 = cp->vaddr;
+	tuple.dst.u.all = cp->vport;
+
+	IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n",
+		      __func__, ARG_CONN(cp));
+
+	h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
+	if (h) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (nf_ct_kill(ct)) {
+			IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple="
+				      FMT_TUPLE "\n",
+				      __func__, ct, ARG_TUPLE(&tuple));
+		} else {
+			IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple="
+				      FMT_TUPLE "\n",
+				      __func__, ct, ARG_TUPLE(&tuple));
+		}
+		nf_ct_put(ct);
+	} else {
+		IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
+			      __func__, ARG_TUPLE(&tuple));
+	}
+}
+
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
new file mode 100644
index 000000000..f56862a87
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Never Queue scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	int loh = 0, doh;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
+		    !atomic_read(&dest->weight))
+			continue;
+
+		doh = ip_vs_nq_dest_overhead(dest);
+
+		/* return the server directly if it is idle */
+		if (atomic_read(&dest->activeconns) == 0) {
+			least = dest;
+			loh = doh;
+			goto out;
+		}
+
+		if (!least ||
+		    ((__s64)loh * atomic_read(&dest->weight) >
+		     (__s64)doh * atomic_read(&least->weight))) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (!least) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+  out:
+	IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+	.name =			"nq",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+	.schedule =		ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
new file mode 100644
index 000000000..c03066fdd
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Overflow-Connection Scheduling module
+ *
+ * Authors:     Raducu Deaconu <rhadoo_io@yahoo.com>
+ *
+ * Scheduler implements "overflow" loadbalancing according to number of active
+ * connections , will keep all connections to the node with the highest weight
+ * and overflow to the next node if the number of connections exceeds the node's
+ * weight.
+ * Note that this scheduler might not be suitable for UDP because it only uses
+ * active connections
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* OVF Connection scheduling  */
+static struct ip_vs_dest *
+ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *h = NULL;
+	int hw = 0, w;
+
+	IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
+	/* select the node with highest weight, go to next in line if active
+	* connections exceed weight
+	*/
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		w = atomic_read(&dest->weight);
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->activeconns) > w ||
+		    w == 0)
+			continue;
+		if (!h || w > hw) {
+			h = dest;
+			hw = w;
+		}
+	}
+
+	if (h) {
+		IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
+			      IP_VS_DBG_ADDR(h->af, &h->addr),
+			      ntohs(h->port),
+			      atomic_read(&h->activeconns),
+			      atomic_read(&h->weight));
+		return h;
+	}
+
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
+	.name =			"ovf",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
+	.schedule =		ip_vs_ovf_schedule,
+};
+
+static int __init ip_vs_ovf_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+}
+
+static void __exit ip_vs_ovf_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_ovf_init);
+module_exit(ip_vs_ovf_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
new file mode 100644
index 000000000..166c669f0
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+/* IPVS pe list */
+static LIST_HEAD(ip_vs_pe);
+
+/* semaphore for IPVS PEs. */
+static DEFINE_MUTEX(ip_vs_pe_mutex);
+
+/* Get pe in the pe list by name */
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
+{
+	struct ip_vs_pe *pe;
+
+	IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
+		  pe_name);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
+		/* Test and get the modules atomically */
+		if (pe->module &&
+		    !try_module_get(pe->module)) {
+			/* This pe is just deleted */
+			continue;
+		}
+		if (strcmp(pe_name, pe->name)==0) {
+			/* HIT */
+			rcu_read_unlock();
+			return pe;
+		}
+		module_put(pe->module);
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+/* Lookup pe and try to load it if it doesn't exist */
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
+{
+	struct ip_vs_pe *pe;
+
+	/* Search for the pe by name */
+	pe = __ip_vs_pe_getbyname(name);
+
+	/* If pe not found, load the module and search again */
+	if (!pe) {
+		request_module("ip_vs_pe_%s", name);
+		pe = __ip_vs_pe_getbyname(name);
+	}
+
+	return pe;
+}
+
+/* Register a pe in the pe list */
+int register_ip_vs_pe(struct ip_vs_pe *pe)
+{
+	struct ip_vs_pe *tmp;
+
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc())
+		return -ENOENT;
+
+	mutex_lock(&ip_vs_pe_mutex);
+	/* Make sure that the pe with this name doesn't exist
+	 * in the pe list.
+	 */
+	list_for_each_entry(tmp, &ip_vs_pe, n_list) {
+		if (strcmp(tmp->name, pe->name) == 0) {
+			mutex_unlock(&ip_vs_pe_mutex);
+			ip_vs_use_count_dec();
+			pr_err("%s(): [%s] pe already existed "
+			       "in the system\n", __func__, pe->name);
+			return -EINVAL;
+		}
+	}
+	/* Add it into the d-linked pe list */
+	list_add_rcu(&pe->n_list, &ip_vs_pe);
+	mutex_unlock(&ip_vs_pe_mutex);
+
+	pr_info("[%s] pe registered.\n", pe->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_ip_vs_pe);
+
+/* Unregister a pe from the pe list */
+int unregister_ip_vs_pe(struct ip_vs_pe *pe)
+{
+	mutex_lock(&ip_vs_pe_mutex);
+	/* Remove it from the d-linked pe list */
+	list_del_rcu(&pe->n_list);
+	mutex_unlock(&ip_vs_pe_mutex);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	pr_info("[%s] pe unregistered.\n", pe->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
new file mode 100644
index 000000000..0ac6705a6
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+#ifdef CONFIG_IP_VS_DEBUG
+static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
+				    const char *callid, size_t callid_len,
+				    int *idx)
+{
+	size_t max_len = 64;
+	size_t len = min3(max_len, callid_len, buf_len - *idx - 1);
+	memcpy(buf + *idx, callid, len);
+	buf[*idx+len] = '\0';
+	*idx += len + 1;
+	return buf + *idx - len;
+}
+
+#define IP_VS_DEBUG_CALLID(callid, len)					\
+	ip_vs_dbg_callid(ip_vs_dbg_buf, sizeof(ip_vs_dbg_buf),		\
+			 callid, len, &ip_vs_dbg_idx)
+#endif
+
+static int get_callid(const char *dptr, unsigned int dataoff,
+		      unsigned int datalen,
+		      unsigned int *matchoff, unsigned int *matchlen)
+{
+	/* Find callid */
+	while (1) {
+		int ret = ct_sip_get_header(NULL, dptr, dataoff, datalen,
+					    SIP_HDR_CALL_ID, matchoff,
+					    matchlen);
+		if (ret > 0)
+			break;
+		if (!ret)
+			return -EINVAL;
+		dataoff += *matchoff;
+	}
+
+	/* Too large is useless */
+	if (*matchlen > IP_VS_PEDATA_MAXLEN)
+		return -EINVAL;
+
+	/* SIP headers are always followed by a line terminator */
+	if (*matchoff + *matchlen == datalen)
+		return -EINVAL;
+
+	/* RFC 2543 allows lines to be terminated with CR, LF or CRLF,
+	 * RFC 3261 allows only CRLF, we support both. */
+	if (*(dptr + *matchoff + *matchlen) != '\r' &&
+	    *(dptr + *matchoff + *matchlen) != '\n')
+		return -EINVAL;
+
+	IP_VS_DBG_BUF(9, "SIP callid %s (%d bytes)\n",
+		      IP_VS_DEBUG_CALLID(dptr + *matchoff, *matchlen),
+		      *matchlen);
+	return 0;
+}
+
+static int
+ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
+{
+	struct ip_vs_iphdr iph;
+	unsigned int dataoff, datalen, matchoff, matchlen;
+	const char *dptr;
+	int retc;
+
+	retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+
+	/* Only useful with UDP */
+	if (!retc || iph.protocol != IPPROTO_UDP)
+		return -EINVAL;
+	/* todo: IPv6 fragments:
+	 *       I think this only should be done for the first fragment. /HS
+	 */
+	dataoff = iph.len + sizeof(struct udphdr);
+
+	if (dataoff >= skb->len)
+		return -EINVAL;
+	retc = skb_linearize(skb);
+	if (retc < 0)
+		return retc;
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+
+	if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
+		return -EINVAL;
+
+	/* N.B: pe_data is only set on success,
+	 * this allows fallback to the default persistence logic on failure
+	 */
+	p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
+	if (!p->pe_data)
+		return -ENOMEM;
+
+	p->pe_data_len = matchlen;
+
+	return 0;
+}
+
+static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
+				  struct ip_vs_conn *ct)
+
+{
+	bool ret = false;
+
+	if (ct->af == p->af &&
+	    ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
+	    /* protocol should only be IPPROTO_IP if
+	     * d_addr is a fwmark */
+	    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+			     p->vaddr, &ct->vaddr) &&
+	    ct->vport == p->vport &&
+	    ct->flags & IP_VS_CONN_F_TEMPLATE &&
+	    ct->protocol == p->protocol &&
+	    ct->pe_data && ct->pe_data_len == p->pe_data_len &&
+	    !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
+		ret = true;
+
+	IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DEBUG_CALLID(p->pe_data, p->pe_data_len),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
+		      ret ? "hit" : "not hit");
+
+	return ret;
+}
+
+static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p,
+				 u32 initval, bool inverse)
+{
+	return jhash(p->pe_data, p->pe_data_len, initval);
+}
+
+static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
+{
+	memcpy(buf, cp->pe_data, cp->pe_data_len);
+	return cp->pe_data_len;
+}
+
+static struct ip_vs_conn *
+ip_vs_sip_conn_out(struct ip_vs_service *svc,
+		   struct ip_vs_dest *dest,
+		   struct sk_buff *skb,
+		   const struct ip_vs_iphdr *iph,
+		   __be16 dport,
+		   __be16 cport)
+{
+	if (likely(iph->protocol == IPPROTO_UDP))
+		return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
+	/* currently no need to handle other than UDP */
+	return NULL;
+}
+
+static struct ip_vs_pe ip_vs_sip_pe =
+{
+	.name =			"sip",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_sip_pe.n_list),
+	.fill_param =		ip_vs_sip_fill_param,
+	.ct_match =		ip_vs_sip_ct_match,
+	.hashkey_raw =		ip_vs_sip_hashkey_raw,
+	.show_pe_data =		ip_vs_sip_show_pe_data,
+	.conn_out =		ip_vs_sip_conn_out,
+};
+
+static int __init ip_vs_sip_init(void)
+{
+	return register_ip_vs_pe(&ip_vs_sip_pe);
+}
+
+static void __exit ip_vs_sip_cleanup(void)
+{
+	unregister_ip_vs_pe(&ip_vs_sip_pe);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_sip_init);
+module_exit(ip_vs_sip_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
new file mode 100644
index 000000000..f100da4ba
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_proto.c: transport protocol load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * Changes:
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/gfp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS protocols can only be registered/unregistered when the ipvs
+ * module is loaded/unloaded, so no lock is needed in accessing the
+ * ipvs protocol table.
+ */
+
+#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+#define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))
+
+static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+
+/* States for conn templates: NONE or words separated with ",", max 15 chars */
+static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
+	[IP_VS_CTPL_S_NONE]			= "NONE",
+	[IP_VS_CTPL_S_ASSURED]			= "ASSURED",
+};
+
+/*
+ *	register an ipvs protocol
+ */
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp->next = ip_vs_proto_table[hash];
+	ip_vs_proto_table[hash] = pp;
+
+	if (pp->init != NULL)
+		pp->init(pp);
+
+	return 0;
+}
+
+/*
+ *	register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
+{
+	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
+	struct ip_vs_proto_data *pd =
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
+
+	if (!pd)
+		return -ENOMEM;
+
+	pd->pp = pp;	/* For speed issues */
+	pd->next = ipvs->proto_data_table[hash];
+	ipvs->proto_data_table[hash] = pd;
+	atomic_set(&pd->appcnt, 0);	/* Init app counter */
+
+	if (pp->init_netns != NULL) {
+		int ret = pp->init_netns(ipvs, pd);
+		if (ret) {
+			/* unlink an free proto data */
+			ipvs->proto_data_table[hash] = pd->next;
+			kfree(pd);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	unregister an ipvs protocol
+ */
+static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	struct ip_vs_protocol **pp_p;
+	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp_p = &ip_vs_proto_table[hash];
+	for (; *pp_p; pp_p = &(*pp_p)->next) {
+		if (*pp_p == pp) {
+			*pp_p = pp->next;
+			if (pp->exit != NULL)
+				pp->exit(pp);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+/*
+ *	unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	struct ip_vs_proto_data **pd_p;
+	unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+	pd_p = &ipvs->proto_data_table[hash];
+	for (; *pd_p; pd_p = &(*pd_p)->next) {
+		if (*pd_p == pd) {
+			*pd_p = pd->next;
+			if (pd->pp->exit_netns != NULL)
+				pd->pp->exit_netns(ipvs, pd);
+			kfree(pd);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+/*
+ *	get ip_vs_protocol object by its proto.
+ */
+struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
+{
+	struct ip_vs_protocol *pp;
+	unsigned int hash = IP_VS_PROTO_HASH(proto);
+
+	for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
+		if (pp->protocol == proto)
+			return pp;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(ip_vs_proto_get);
+
+/*
+ *	get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+	struct ip_vs_proto_data *pd;
+	unsigned int hash = IP_VS_PROTO_HASH(proto);
+
+	for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+		if (pd->pp->protocol == proto)
+			return pd;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
+
+/*
+ *	Propagate event for state change to all protocols
+ */
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
+{
+	struct ip_vs_proto_data *pd;
+	int i;
+
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+			if (pd->pp->timeout_change)
+				pd->pp->timeout_change(pd, flags);
+		}
+	}
+}
+
+
+int *
+ip_vs_create_timeout_table(int *table, int size)
+{
+	return kmemdup(table, size, GFP_KERNEL);
+}
+
+
+const char *ip_vs_state_name(const struct ip_vs_conn *cp)
+{
+	unsigned int state = cp->state;
+	struct ip_vs_protocol *pp;
+
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+
+		if (state >= IP_VS_CTPL_S_LAST)
+			return "ERR!";
+		return ip_vs_ctpl_state_name_table[state] ? : "?";
+	}
+	pp = ip_vs_proto_get(cp->protocol);
+	if (pp == NULL || pp->state_name == NULL)
+		return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
+	return pp->state_name(state);
+}
+
+
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[128];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "TRUNCATED");
+	else if (ih->frag_off & htons(IP_OFFSET))
+		sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
+	else {
+		__be16 _ports[2], *pptr;
+
+		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "TRUNCATED %pI4->%pI4",
+				&ih->saddr, &ih->daddr);
+		else
+			sprintf(buf, "%pI4:%u->%pI4:%u",
+				&ih->saddr, ntohs(pptr[0]),
+				&ih->daddr, ntohs(pptr[1]));
+	}
+
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[192];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "TRUNCATED");
+	else if (ih->nexthdr == IPPROTO_FRAGMENT)
+		sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
+	else {
+		__be16 _ports[2], *pptr;
+
+		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "TRUNCATED %pI6c->%pI6c",
+				&ih->saddr, &ih->daddr);
+		else
+			sprintf(buf, "%pI6c:%u->%pI6c:%u",
+				&ih->saddr, ntohs(pptr[0]),
+				&ih->daddr, ntohs(pptr[1]));
+	}
+
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
+			  int offset,
+			  const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+/*
+ * per network name-space init
+ */
+int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
+{
+	int i, ret;
+	static struct ip_vs_protocol *protos[] = {
+#ifdef CONFIG_IP_VS_PROTO_TCP
+        &ip_vs_protocol_tcp,
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	&ip_vs_protocol_udp,
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	&ip_vs_protocol_sctp,
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	&ip_vs_protocol_ah,
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	&ip_vs_protocol_esp,
+#endif
+	};
+
+	for (i = 0; i < ARRAY_SIZE(protos); i++) {
+		ret = register_ip_vs_proto_netns(ipvs, protos[i]);
+		if (ret < 0)
+			goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	ip_vs_protocol_net_cleanup(ipvs);
+	return ret;
+}
+
+void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
+{
+	struct ip_vs_proto_data *pd;
+	int i;
+
+	/* unregister all the ipvs proto data for this netns */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pd = ipvs->proto_data_table[i]) != NULL)
+			unregister_ip_vs_proto_netns(ipvs, pd);
+	}
+}
+
+int __init ip_vs_protocol_init(void)
+{
+	char protocols[64];
+#define REGISTER_PROTOCOL(p)			\
+	do {					\
+		register_ip_vs_protocol(p);	\
+		strcat(protocols, ", ");	\
+		strcat(protocols, (p)->name);	\
+	} while (0)
+
+	protocols[0] = '\0';
+	protocols[2] = '\0';
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
+#endif
+	pr_info("Registered protocols (%s)\n", &protocols[2]);
+
+	return 0;
+}
+
+
+void ip_vs_protocol_cleanup(void)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	/* unregister all the ipvs protocols */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pp = ip_vs_proto_table[i]) != NULL)
+			unregister_ip_vs_protocol(pp);
+	}
+}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
new file mode 100644
index 000000000..89602c16f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+static void
+ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
+			     const struct ip_vs_iphdr *iph,
+			     struct ip_vs_conn_param *p)
+{
+	if (likely(!ip_vs_iph_inverse(iph)))
+		ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
+				      &iph->saddr, htons(PORT_ISAKMP),
+				      &iph->daddr, htons(PORT_ISAKMP), p);
+	else
+		ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
+				      &iph->daddr, htons(PORT_ISAKMP),
+				      &iph->saddr, htons(PORT_ISAKMP), p);
+}
+
+static struct ip_vs_conn *
+ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+		   const struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
+
+	ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
+	cp = ip_vs_conn_in_get(&p);
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+			      "%s%s %s->%s\n",
+			      ip_vs_iph_icmp(iph) ? "ICMP+" : "",
+			      ip_vs_proto_get(iph->protocol)->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+		    const struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
+
+	ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
+	cp = ip_vs_conn_out_get(&p);
+	if (!cp) {
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+			      "%s%s %s->%s\n",
+			      ip_vs_iph_icmp(iph) ? "ICMP+" : "",
+			      ip_vs_proto_get(iph->protocol)->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		     struct ip_vs_proto_data *pd,
+		     int *verdict, struct ip_vs_conn **cpp,
+		     struct ip_vs_iphdr *iph)
+{
+	/*
+	 * AH/ESP is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+#ifdef CONFIG_IP_VS_PROTO_AH
+struct ip_vs_protocol ip_vs_protocol_ah = {
+	.name =			"AH",
+	.protocol =		IPPROTO_AH,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			NULL,
+	.exit =			NULL,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			NULL,
+	.exit =			NULL,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
new file mode 100644
index 000000000..a0921adc3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/sctp/checksum.h>
+#include <net/ip_vs.h>
+
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
+sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		   struct ip_vs_proto_data *pd,
+		   int *verdict, struct ip_vs_conn **cpp,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_service *svc;
+	struct sctp_chunkhdr _schunkh, *sch;
+	struct sctphdr *sh, _sctph;
+	__be16 _ports[2], *ports = NULL;
+
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+		if (sh) {
+			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
+						 sizeof(_schunkh), &_schunkh);
+			if (sch) {
+				if (sch->type == SCTP_CID_ABORT ||
+				    !(sysctl_sloppy_sctp(ipvs) ||
+				      sch->type == SCTP_CID_INIT))
+					return 1;
+				ports = &sh->source;
+			}
+		}
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
+	}
+
+	if (!ports) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+	if (svc) {
+		int ignored;
+
+		if (ip_vs_todrop(ipvs)) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			*verdict = NF_DROP;
+			return 0;
+		}
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd, iph);
+			else
+				*verdict = NF_DROP;
+			return 0;
+		}
+	}
+	/* NF_ACCEPT */
+	return 1;
+}
+
+static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
+			  unsigned int sctphoff)
+{
+	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+static int
+sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct sctphdr *sctph;
+	unsigned int sctphoff = iph->len;
+	bool payload_csum = false;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!sctp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		ret = ip_vs_app_pkt_out(cp, skb, iph);
+		if (ret == 0)
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 2)
+			payload_csum = true;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+
+	/* Only update csum if we really have to */
+	if (sctph->source != cp->vport || payload_csum ||
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		sctph->source = cp->vport;
+		sctp_nat_csum(skb, sctph, sctphoff);
+	} else {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	return 1;
+}
+
+static int
+sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct sctphdr *sctph;
+	unsigned int sctphoff = iph->len;
+	bool payload_csum = false;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!sctp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		ret = ip_vs_app_pkt_in(cp, skb, iph);
+		if (ret == 0)
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 2)
+			payload_csum = true;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+
+	/* Only update csum if we really have to */
+	if (sctph->dest != cp->dport || payload_csum ||
+	    (skb->ip_summed == CHECKSUM_PARTIAL &&
+	     !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
+		sctph->dest = cp->dport;
+		sctp_nat_csum(skb, sctph, sctphoff);
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	return 1;
+}
+
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	unsigned int sctphoff;
+	struct sctphdr *sh;
+	__le32 cmp, val;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	sh = (struct sctphdr *)(skb->data + sctphoff);
+	cmp = sh->checksum;
+	val = sctp_compute_cksum(skb, sctphoff);
+
+	if (val != cmp) {
+		/* CRC failure, dump it. */
+		IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
+				"Failed checksum for");
+		return 0;
+	}
+	return 1;
+}
+
+enum ipvs_sctp_event_t {
+	IP_VS_SCTP_DATA = 0,		/* DATA, SACK, HEARTBEATs */
+	IP_VS_SCTP_INIT,
+	IP_VS_SCTP_INIT_ACK,
+	IP_VS_SCTP_COOKIE_ECHO,
+	IP_VS_SCTP_COOKIE_ACK,
+	IP_VS_SCTP_SHUTDOWN,
+	IP_VS_SCTP_SHUTDOWN_ACK,
+	IP_VS_SCTP_SHUTDOWN_COMPLETE,
+	IP_VS_SCTP_ERROR,
+	IP_VS_SCTP_ABORT,
+	IP_VS_SCTP_EVENT_LAST
+};
+
+/* RFC 2960, 3.2 Chunk Field Descriptions */
+static __u8 sctp_events[] = {
+	[SCTP_CID_DATA]			= IP_VS_SCTP_DATA,
+	[SCTP_CID_INIT]			= IP_VS_SCTP_INIT,
+	[SCTP_CID_INIT_ACK]		= IP_VS_SCTP_INIT_ACK,
+	[SCTP_CID_SACK]			= IP_VS_SCTP_DATA,
+	[SCTP_CID_HEARTBEAT]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_HEARTBEAT_ACK]	= IP_VS_SCTP_DATA,
+	[SCTP_CID_ABORT]		= IP_VS_SCTP_ABORT,
+	[SCTP_CID_SHUTDOWN]		= IP_VS_SCTP_SHUTDOWN,
+	[SCTP_CID_SHUTDOWN_ACK]		= IP_VS_SCTP_SHUTDOWN_ACK,
+	[SCTP_CID_ERROR]		= IP_VS_SCTP_ERROR,
+	[SCTP_CID_COOKIE_ECHO]		= IP_VS_SCTP_COOKIE_ECHO,
+	[SCTP_CID_COOKIE_ACK]		= IP_VS_SCTP_COOKIE_ACK,
+	[SCTP_CID_ECN_ECNE]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_ECN_CWR]		= IP_VS_SCTP_DATA,
+	[SCTP_CID_SHUTDOWN_COMPLETE]	= IP_VS_SCTP_SHUTDOWN_COMPLETE,
+};
+
+/* SCTP States:
+ * See RFC 2960, 4. SCTP Association State Diagram
+ *
+ * New states (not in diagram):
+ * - INIT1 state: use shorter timeout for dropped INIT packets
+ * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
+ * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
+ *
+ * The states are as seen in real server. In the diagram, INIT1, INIT,
+ * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
+ *
+ * States as per packets from client (C) and server (S):
+ *
+ * Setup of client connection:
+ * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
+ *
+ * Setup of server connection:
+ * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
+ * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
+ * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
+ */
+
+#define sNO IP_VS_SCTP_S_NONE
+#define sI1 IP_VS_SCTP_S_INIT1
+#define sIN IP_VS_SCTP_S_INIT
+#define sCS IP_VS_SCTP_S_COOKIE_SENT
+#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
+#define sCW IP_VS_SCTP_S_COOKIE_WAIT
+#define sCO IP_VS_SCTP_S_COOKIE
+#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
+#define sES IP_VS_SCTP_S_ESTABLISHED
+#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
+#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
+#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
+#define sRJ IP_VS_SCTP_S_REJECTED
+#define sCL IP_VS_SCTP_S_CLOSED
+
+static const __u8 sctp_states
+	[IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
+	{ /* INPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
+	{ /* OUTPUT */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
+/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
+/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
+	{ /* INPUT-ONLY */
+/*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
+/* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
+/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
+/* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
+/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
+/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
+/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
+/* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+	},
+};
+
+#define IP_VS_SCTP_MAX_RTO	((60 + 1) * HZ)
+
+/* Timeout table[state] */
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]			= 2 * HZ,
+	[IP_VS_SCTP_S_INIT1]			= (0 + 3 + 1) * HZ,
+	[IP_VS_SCTP_S_INIT]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_SENT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_REPLIED]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_WAIT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_COOKIE_ECHOED]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_ESTABLISHED]		= 15 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_REJECTED]			= (0 + 3 + 1) * HZ,
+	[IP_VS_SCTP_S_CLOSED]			= IP_VS_SCTP_MAX_RTO,
+	[IP_VS_SCTP_S_LAST]			= 2 * HZ,
+};
+
+static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]			= "NONE",
+	[IP_VS_SCTP_S_INIT1]			= "INIT1",
+	[IP_VS_SCTP_S_INIT]			= "INIT",
+	[IP_VS_SCTP_S_COOKIE_SENT]		= "C-SENT",
+	[IP_VS_SCTP_S_COOKIE_REPLIED]		= "C-REPLIED",
+	[IP_VS_SCTP_S_COOKIE_WAIT]		= "C-WAIT",
+	[IP_VS_SCTP_S_COOKIE]			= "COOKIE",
+	[IP_VS_SCTP_S_COOKIE_ECHOED]		= "C-ECHOED",
+	[IP_VS_SCTP_S_ESTABLISHED]		= "ESTABLISHED",
+	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= "S-SENT",
+	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= "S-RECEIVED",
+	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= "S-ACK-SENT",
+	[IP_VS_SCTP_S_REJECTED]			= "REJECTED",
+	[IP_VS_SCTP_S_CLOSED]			= "CLOSED",
+	[IP_VS_SCTP_S_LAST]			= "BUG!",
+};
+
+
+static const char *sctp_state_name(int state)
+{
+	if (state >= IP_VS_SCTP_S_LAST)
+		return "ERR!";
+	if (sctp_state_name_table[state])
+		return sctp_state_name_table[state];
+	return "?";
+}
+
+static inline void
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
+		int direction, const struct sk_buff *skb)
+{
+	struct sctp_chunkhdr _sctpch, *sch;
+	unsigned char chunk_type;
+	int event, next_state;
+	int ihl, cofs;
+
+#ifdef CONFIG_IP_VS_IPV6
+	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	ihl = ip_hdrlen(skb);
+#endif
+
+	cofs = ihl + sizeof(struct sctphdr);
+	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
+	if (sch == NULL)
+		return;
+
+	chunk_type = sch->type;
+	/*
+	 * Section 3: Multiple chunks can be bundled into one SCTP packet
+	 * up to the MTU size, except for the INIT, INIT ACK, and
+	 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
+	 * any other chunk in a packet.
+	 *
+	 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
+	 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
+	 * bundled with an ABORT, but they MUST be placed before the ABORT
+	 * in the SCTP packet or they will be ignored by the receiver.
+	 */
+	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
+	    (sch->type == SCTP_CID_COOKIE_ACK)) {
+		int clen = ntohs(sch->length);
+
+		if (clen >= sizeof(_sctpch)) {
+			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
+						 sizeof(_sctpch), &_sctpch);
+			if (sch && sch->type == SCTP_CID_ABORT)
+				chunk_type = sch->type;
+		}
+	}
+
+	event = (chunk_type < sizeof(sctp_events)) ?
+		sctp_events[chunk_type] : IP_VS_SCTP_DATA;
+
+	/* Update direction to INPUT_ONLY if necessary
+	 * or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (direction == IP_VS_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			direction = IP_VS_DIR_INPUT_ONLY;
+	}
+
+	next_state = sctp_states[direction][event][cp->state];
+
+	if (next_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
+				"%s:%d state: %s->%s conn->refcnt:%d\n",
+				pd->pp->name,
+				((direction == IP_VS_DIR_OUTPUT) ?
+				 "output " : "input "),
+				IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
+				ntohs(cp->dport),
+				IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				ntohs(cp->cport),
+				sctp_state_name(cp->state),
+				sctp_state_name(next_state),
+				refcount_read(&cp->refcnt));
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+		if (next_state == IP_VS_SCTP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
+	}
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = next_state];
+	else	/* What to do ? */
+		cp->timeout = sctp_timeouts[cp->state = next_state];
+}
+
+static void
+sctp_state_transition(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
+{
+	spin_lock_bh(&cp->lock);
+	set_sctp_state(pd, cp, direction, skb);
+	spin_unlock_bh(&cp->lock);
+}
+
+static inline __u16 sctp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
+		& SCTP_APP_TAB_MASK;
+}
+
+static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
+
+	hash = sctp_app_hashkey(port);
+
+	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
+	atomic_inc(&pd->appcnt);
+out:
+
+	return ret;
+}
+
+static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
+
+	atomic_dec(&pd->appcnt);
+	list_del_rcu(&inc->p_list);
+}
+
+static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	struct netns_ipvs *ipvs = cp->ipvs;
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+	/* Lookup application incarnations and bind the right one */
+	hash = sctp_app_hashkey(cp->vport);
+
+	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+					"%s:%u to app %s on port %u\n",
+					__func__,
+					IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+					ntohs(cp->cport),
+					IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+					ntohs(cp->vport),
+					inc->name, ntohs(inc->port));
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			break;
+		}
+	}
+
+	return result;
+}
+
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+							sizeof(sctp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	kfree(pd->timeout_table);
+}
+
+struct ip_vs_protocol ip_vs_protocol_sctp = {
+	.name		= "SCTP",
+	.protocol	= IPPROTO_SCTP,
+	.num_states	= IP_VS_SCTP_S_LAST,
+	.dont_defrag	= 0,
+	.init		= NULL,
+	.exit		= NULL,
+	.init_netns	= __ip_vs_sctp_init,
+	.exit_netns	= __ip_vs_sctp_exit,
+	.register_app	= sctp_register_app,
+	.unregister_app = sctp_unregister_app,
+	.conn_schedule	= sctp_conn_schedule,
+	.conn_in_get	= ip_vs_conn_in_get_proto,
+	.conn_out_get	= ip_vs_conn_out_get_proto,
+	.snat_handler	= sctp_snat_handler,
+	.dnat_handler	= sctp_dnat_handler,
+	.state_name	= sctp_state_name,
+	.state_transition = sctp_state_transition,
+	.app_conn_bind	= sctp_app_conn_bind,
+	.debug_packet	= ip_vs_tcpudp_debug_packet,
+	.timeout_change	= NULL,
+};
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
new file mode 100644
index 000000000..7da51390c
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -0,0 +1,746 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/indirect_call_wrapper.h>
+
+#include <net/ip_vs.h>
+
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
+tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		  struct ip_vs_proto_data *pd,
+		  int *verdict, struct ip_vs_conn **cpp,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_service *svc;
+	struct tcphdr _tcph, *th;
+	__be16 _ports[2], *ports = NULL;
+
+	/* In the event of icmp, we're only guaranteed to have the first 8
+	 * bytes of the transport header, so we only check the rest of the
+	 * TCP packet for non-ICMP packets
+	 */
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+		if (th) {
+			if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
+				return 1;
+			ports = &th->source;
+		}
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
+	}
+
+	if (!ports) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
+
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+
+	if (svc) {
+		int ignored;
+
+		if (ip_vs_todrop(ipvs)) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd, iph);
+			else
+				*verdict = NF_DROP;
+			return 0;
+		}
+	}
+	/* NF_ACCEPT */
+	return 1;
+}
+
+
+static inline void
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+}
+
+
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						csum_unfold(tcph->check))));
+}
+
+
+INDIRECT_CALLABLE_SCOPE int
+tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff = iph->len;
+	bool payload_csum = false;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!tcp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - tcphoff;
+		else
+			payload_csum = true;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->source = cp->vport;
+
+	/* Adjust TCP checksums */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htons(oldlen),
+					htons(skb->len - tcphoff));
+	} else if (!payload_csum) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = cp->app ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, tcph->check,
+			  (char*)&(tcph->check) - (char*)tcph);
+	}
+	return 1;
+}
+
+
+static int
+tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff = iph->len;
+	bool payload_csum = false;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!tcp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn and iph ack_seq stuff
+		 */
+		if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - tcphoff;
+		else
+			payload_csum = true;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->dest = cp->dport;
+
+	/*
+	 *	Adjust TCP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
+					htons(oldlen),
+					htons(skb->len - tcphoff));
+	} else if (!payload_csum) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = cp->app ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+		fallthrough;
+	case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+		if (af == AF_INET6) {
+			if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len - tcphoff,
+					    ipv6_hdr(skb)->nexthdr,
+					    skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		} else
+#endif
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
+					      skb->len - tcphoff,
+					      ip_hdr(skb)->protocol,
+					      skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		break;
+	default:
+		/* No need to checksum. */
+		break;
+	}
+
+	return 1;
+}
+
+
+#define TCP_DIR_INPUT		0
+#define TCP_DIR_OUTPUT		4
+#define TCP_DIR_INPUT_ONLY	8
+
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
+	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
+	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
+	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
+};
+
+/*
+ *	Timeout table[state]
+ */
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	2*HZ,
+	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
+	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYN_RECV]		=	1*60*HZ,
+	[IP_VS_TCP_S_FIN_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_TIME_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
+	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
+	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYNACK]		=	120*HZ,
+	[IP_VS_TCP_S_LAST]		=	2*HZ,
+};
+
+static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	"NONE",
+	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
+	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
+	[IP_VS_TCP_S_SYN_RECV]		=	"SYN_RECV",
+	[IP_VS_TCP_S_FIN_WAIT]		=	"FIN_WAIT",
+	[IP_VS_TCP_S_TIME_WAIT]		=	"TIME_WAIT",
+	[IP_VS_TCP_S_CLOSE]		=	"CLOSE",
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
+	[IP_VS_TCP_S_LAST_ACK]		=	"LAST_ACK",
+	[IP_VS_TCP_S_LISTEN]		=	"LISTEN",
+	[IP_VS_TCP_S_SYNACK]		=	"SYNACK",
+	[IP_VS_TCP_S_LAST]		=	"BUG!",
+};
+
+static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
+	[IP_VS_TCP_S_NONE]		=	false,
+	[IP_VS_TCP_S_ESTABLISHED]	=	true,
+	[IP_VS_TCP_S_SYN_SENT]		=	true,
+	[IP_VS_TCP_S_SYN_RECV]		=	true,
+	[IP_VS_TCP_S_FIN_WAIT]		=	false,
+	[IP_VS_TCP_S_TIME_WAIT]		=	false,
+	[IP_VS_TCP_S_CLOSE]		=	false,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	false,
+	[IP_VS_TCP_S_LAST_ACK]		=	false,
+	[IP_VS_TCP_S_LISTEN]		=	false,
+	[IP_VS_TCP_S_SYNACK]		=	true,
+};
+
+#define sNO IP_VS_TCP_S_NONE
+#define sES IP_VS_TCP_S_ESTABLISHED
+#define sSS IP_VS_TCP_S_SYN_SENT
+#define sSR IP_VS_TCP_S_SYN_RECV
+#define sFW IP_VS_TCP_S_FIN_WAIT
+#define sTW IP_VS_TCP_S_TIME_WAIT
+#define sCL IP_VS_TCP_S_CLOSE
+#define sCW IP_VS_TCP_S_CLOSE_WAIT
+#define sLA IP_VS_TCP_S_LAST_ACK
+#define sLI IP_VS_TCP_S_LISTEN
+#define sSA IP_VS_TCP_S_SYNACK
+
+struct tcp_states_t {
+	int next_state[IP_VS_TCP_S_LAST];
+};
+
+static const char * tcp_state_name(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return "ERR!";
+	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
+}
+
+static bool tcp_state_active(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return false;
+	return tcp_state_active_table[state];
+}
+
+static struct tcp_states_t tcp_states[] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t tcp_states_dos[] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
+{
+	int on = (flags & 1);		/* secure_tcp */
+
+	/*
+	** FIXME: change secure_tcp to independent sysctl var
+	** or make it per-service or per-app because it is valid
+	** for most if not for all of the applications. Something
+	** like "capabilities" (flags) for each object.
+	*/
+	pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
+}
+
+static inline int tcp_state_idx(struct tcphdr *th)
+{
+	if (th->rst)
+		return 3;
+	if (th->syn)
+		return 0;
+	if (th->fin)
+		return 1;
+	if (th->ack)
+		return 2;
+	return -1;
+}
+
+static inline void
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
+	      int direction, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_VS_TCP_S_CLOSE;
+	int state_off = tcp_state_off[direction];
+
+	/*
+	 *    Update state offset to INPUT_ONLY if necessary
+	 *    or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (state_off == TCP_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			state_off = TCP_DIR_INPUT_ONLY;
+	}
+
+	if ((state_idx = tcp_state_idx(th)) < 0) {
+		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state =
+		pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
+
+  tcp_state_out:
+	if (new_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] c:%s:%d v:%s:%d "
+			      "d:%s:%d state: %s->%s conn->refcnt:%d\n",
+			      pd->pp->name,
+			      ((state_off == TCP_DIR_OUTPUT) ?
+			       "output " : "input "),
+			      th->syn ? 'S' : '.',
+			      th->fin ? 'F' : '.',
+			      th->ack ? 'A' : '.',
+			      th->rst ? 'R' : '.',
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+			      ntohs(cp->vport),
+			      IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
+			      ntohs(cp->dport),
+			      tcp_state_name(cp->state),
+			      tcp_state_name(new_state),
+			      refcount_read(&cp->refcnt));
+
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			    !tcp_state_active(new_state)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   tcp_state_active(new_state)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+		if (new_state == IP_VS_TCP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
+	}
+
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = new_state];
+	else	/* What to do ? */
+		cp->timeout = tcp_timeouts[cp->state = new_state];
+}
+
+/*
+ *	Handle state transitions
+ */
+static void
+tcp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_proto_data *pd)
+{
+	struct tcphdr _tcph, *th;
+
+#ifdef CONFIG_IP_VS_IPV6
+	int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	int ihl = ip_hdrlen(skb);
+#endif
+
+	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return;
+
+	spin_lock_bh(&cp->lock);
+	set_tcp_state(pd, cp, direction, th);
+	spin_unlock_bh(&cp->lock);
+}
+
+static inline __u16 tcp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
+		& TCP_APP_TAB_MASK;
+}
+
+
+static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
+
+	hash = tcp_app_hashkey(port);
+
+	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
+	atomic_inc(&pd->appcnt);
+
+  out:
+	return ret;
+}
+
+
+static void
+tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
+
+	atomic_dec(&pd->appcnt);
+	list_del_rcu(&inc->p_list);
+}
+
+
+static int
+tcp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	struct netns_ipvs *ipvs = cp->ipvs;
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = tcp_app_hashkey(cp->vport);
+
+	list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+
+			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			break;
+		}
+	}
+
+	return result;
+}
+
+
+/*
+ *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+{
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
+
+	spin_lock_bh(&cp->lock);
+	cp->state = IP_VS_TCP_S_LISTEN;
+	cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+			   : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
+	spin_unlock_bh(&cp->lock);
+}
+
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+							sizeof(tcp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
+	pd->tcp_state_table = tcp_states;
+	return 0;
+}
+
+static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	kfree(pd->timeout_table);
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_tcp = {
+	.name =			"TCP",
+	.protocol =		IPPROTO_TCP,
+	.num_states =		IP_VS_TCP_S_LAST,
+	.dont_defrag =		0,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__ip_vs_tcp_init,
+	.exit_netns =		__ip_vs_tcp_exit,
+	.register_app =		tcp_register_app,
+	.unregister_app =	tcp_unregister_app,
+	.conn_schedule =	tcp_conn_schedule,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
+	.snat_handler =		tcp_snat_handler,
+	.dnat_handler =		tcp_dnat_handler,
+	.state_name =		tcp_state_name,
+	.state_transition =	tcp_state_transition,
+	.app_conn_bind =	tcp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	tcp_timeout_change,
+};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
new file mode 100644
index 000000000..68260d91c
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
+#include <linux/indirect_call_wrapper.h>
+
+#include <net/ip_vs.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
+udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		  struct ip_vs_proto_data *pd,
+		  int *verdict, struct ip_vs_conn **cpp,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_service *svc;
+	struct udphdr _udph, *uh;
+	__be16 _ports[2], *ports = NULL;
+
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		/* IPv6 fragments, only first fragment will hit this */
+		uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+		if (uh)
+			ports = &uh->source;
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
+	}
+
+	if (!ports) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+
+	if (svc) {
+		int ignored;
+
+		if (ip_vs_todrop(ipvs)) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd, iph);
+			else
+				*verdict = NF_DROP;
+			return 0;
+		}
+	}
+	/* NF_ACCEPT */
+	return 1;
+}
+
+
+static inline void
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+		uhdr->check =
+			csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	if (!uhdr->check)
+		uhdr->check = CSUM_MANGLED_0;
+}
+
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						csum_unfold(uhdr->check))));
+	else
+#endif
+	uhdr->check =
+		~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						csum_unfold(uhdr->check))));
+}
+
+
+INDIRECT_CALLABLE_SCOPE int
+udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct udphdr *udph;
+	unsigned int udphoff = iph->len;
+	bool payload_csum = false;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!udp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Call application helper if needed
+		 */
+		if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - udphoff;
+		else
+			payload_csum = true;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->source = cp->vport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htons(oldlen),
+					htons(skb->len - udphoff));
+	} else if (!payload_csum && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = cp->app ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, udph->check,
+			  (char*)&(udph->check) - (char*)udph);
+	}
+	return 1;
+}
+
+
+static int
+udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+{
+	struct udphdr *udph;
+	unsigned int udphoff = iph->len;
+	bool payload_csum = false;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6 && iph->fragoffs)
+		return 1;
+#endif
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		int ret;
+
+		/* Some checks before mangling */
+		if (!udp_csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn
+		 */
+		if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
+			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - udphoff;
+		else
+			payload_csum = true;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->dest = cp->dport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
+					htons(oldlen),
+					htons(skb->len - udphoff));
+	} else if (!payload_csum && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = cp->app ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct udphdr _udph, *uh;
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+
+	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+	if (uh == NULL)
+		return 0;
+
+	if (uh->check != 0) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = skb_checksum(skb, udphoff,
+						 skb->len - udphoff, 0);
+			fallthrough;
+		case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+			if (af == AF_INET6) {
+				if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						    &ipv6_hdr(skb)->daddr,
+						    skb->len - udphoff,
+						    ipv6_hdr(skb)->nexthdr,
+						    skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			} else
+#endif
+				if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						      ip_hdr(skb)->daddr,
+						      skb->len - udphoff,
+						      ip_hdr(skb)->protocol,
+						      skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			break;
+		default:
+			/* No need to checksum. */
+			break;
+		}
+	}
+	return 1;
+}
+
+static inline __u16 udp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
+		& UDP_APP_TAB_MASK;
+}
+
+
+static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
+
+	hash = udp_app_hashkey(port);
+
+	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
+	atomic_inc(&pd->appcnt);
+
+  out:
+	return ret;
+}
+
+
+static void
+udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
+{
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
+
+	atomic_dec(&pd->appcnt);
+	list_del_rcu(&inc->p_list);
+}
+
+
+static int udp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	struct netns_ipvs *ipvs = cp->ipvs;
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = udp_app_hashkey(cp->vport);
+
+	list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+
+			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			break;
+		}
+	}
+
+	return result;
+}
+
+
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
+	[IP_VS_UDP_S_LAST]		=	2*HZ,
+};
+
+static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	"UDP",
+	[IP_VS_UDP_S_LAST]		=	"BUG!",
+};
+
+static const char * udp_state_name(int state)
+{
+	if (state >= IP_VS_UDP_S_LAST)
+		return "ERR!";
+	return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
+}
+
+static void
+udp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_proto_data *pd)
+{
+	if (unlikely(!pd)) {
+		pr_err("UDP no ns data\n");
+		return;
+	}
+
+	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
+	if (direction == IP_VS_DIR_OUTPUT)
+		ip_vs_control_assure_ct(cp);
+}
+
+static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+							sizeof(udp_timeouts));
+	if (!pd->timeout_table)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+{
+	kfree(pd->timeout_table);
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_udp = {
+	.name =			"UDP",
+	.protocol =		IPPROTO_UDP,
+	.num_states =		IP_VS_UDP_S_LAST,
+	.dont_defrag =		0,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__udp_init,
+	.exit_netns =		__udp_exit,
+	.conn_schedule =	udp_conn_schedule,
+	.conn_in_get =		ip_vs_conn_in_get_proto,
+	.conn_out_get =		ip_vs_conn_out_get_proto,
+	.snat_handler =		udp_snat_handler,
+	.dnat_handler =		udp_dnat_handler,
+	.state_transition =	udp_state_transition,
+	.state_name =		udp_state_name,
+	.register_app =		udp_register_app,
+	.unregister_app =	udp_unregister_app,
+	.app_conn_bind =	udp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,
+};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
new file mode 100644
index 000000000..38495c6f6
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ * Fixes/Changes:
+ *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
+ *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_rr_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
+{
+	struct list_head *p;
+
+	spin_lock_bh(&svc->sched_lock);
+	p = (struct list_head *) svc->sched_data;
+	/* dest is already unlinked, so p->prev is not valid but
+	 * p->next is valid, use it to reach previous entry.
+	 */
+	if (p == &dest->n_list)
+		svc->sched_data = p->next->prev;
+	spin_unlock_bh(&svc->sched_lock);
+	return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct list_head *p;
+	struct ip_vs_dest *dest, *last;
+	int pass = 0;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	spin_lock_bh(&svc->sched_lock);
+	p = (struct list_head *) svc->sched_data;
+	last = dest = list_entry(p, struct ip_vs_dest, n_list);
+
+	do {
+		list_for_each_entry_continue_rcu(dest,
+						 &svc->destinations,
+						 n_list) {
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) > 0)
+				/* HIT */
+				goto out;
+			if (dest == last)
+				goto stop;
+		}
+		pass++;
+		/* Previous dest could be unlinked, do not loop forever.
+		 * If we stay at head there is no need for 2nd pass.
+		 */
+	} while (pass < 2 && p != &svc->destinations);
+
+stop:
+	spin_unlock_bh(&svc->sched_lock);
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+
+  out:
+	svc->sched_data = &dest->n_list;
+	spin_unlock_bh(&svc->sched_lock);
+	IP_VS_DBG_BUF(6, "RR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      refcount_read(&dest->refcnt), atomic_read(&dest->weight));
+
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+	.name =			"rr",			/* name */
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+	.init_service =		ip_vs_rr_init_svc,
+	.add_dest =		NULL,
+	.del_dest =		ip_vs_rr_del_dest,
+	.schedule =		ip_vs_rr_schedule,
+};
+
+static int __init ip_vs_rr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
new file mode 100644
index 000000000..d4903723b
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ * Changes:
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(ip_vs_scheduler_err);
+/*
+ *  IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* semaphore for schedulers */
+static DEFINE_MUTEX(ip_vs_sched_mutex);
+
+
+/*
+ *  Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+			 struct ip_vs_scheduler *scheduler)
+{
+	int ret;
+
+	if (scheduler->init_service) {
+		ret = scheduler->init_service(svc);
+		if (ret) {
+			pr_err("%s(): init error\n", __func__);
+			return ret;
+		}
+	}
+	rcu_assign_pointer(svc->scheduler, scheduler);
+	return 0;
+}
+
+
+/*
+ *  Unbind a service with its scheduler
+ */
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
+			    struct ip_vs_scheduler *sched)
+{
+	struct ip_vs_scheduler *cur_sched;
+
+	cur_sched = rcu_dereference_protected(svc->scheduler, 1);
+	/* This check proves that old 'sched' was installed */
+	if (!cur_sched)
+		return;
+
+	if (sched->done_service)
+		sched->done_service(svc);
+	/* svc->scheduler can be set to NULL only by caller */
+}
+
+
+/*
+ *  Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
+
+	mutex_lock(&ip_vs_sched_mutex);
+
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		/*
+		 * Test and get the modules atomically
+		 */
+		if (sched->module && !try_module_get(sched->module)) {
+			/*
+			 * This scheduler is just deleted
+			 */
+			continue;
+		}
+		if (strcmp(sched_name, sched->name)==0) {
+			/* HIT */
+			mutex_unlock(&ip_vs_sched_mutex);
+			return sched;
+		}
+		module_put(sched->module);
+	}
+
+	mutex_unlock(&ip_vs_sched_mutex);
+	return NULL;
+}
+
+
+/*
+ *  Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	/*
+	 *  Search for the scheduler by sched_name
+	 */
+	sched = ip_vs_sched_getbyname(sched_name);
+
+	/*
+	 *  If scheduler not found, load the module and search again
+	 */
+	if (sched == NULL) {
+		request_module("ip_vs_%s", sched_name);
+		sched = ip_vs_sched_getbyname(sched_name);
+	}
+
+	return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+	if (scheduler)
+		module_put(scheduler->module);
+}
+
+/*
+ * Common error output helper for schedulers
+ */
+
+void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
+{
+	struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+	char *sched_name = sched ? sched->name : "none";
+
+	if (svc->fwmark) {
+		IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
+			     sched_name, svc->fwmark, svc->fwmark, msg);
+#ifdef CONFIG_IP_VS_IPV6
+	} else if (svc->af == AF_INET6) {
+		IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
+			     sched_name, ip_vs_proto_name(svc->protocol),
+			     &svc->addr.in6, ntohs(svc->port), msg);
+#endif
+	} else {
+		IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
+			     sched_name, ip_vs_proto_name(svc->protocol),
+			     &svc->addr.ip, ntohs(svc->port), msg);
+	}
+}
+
+/*
+ *  Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (!scheduler) {
+		pr_err("%s(): NULL arg\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!scheduler->name) {
+		pr_err("%s(): NULL scheduler_name\n", __func__);
+		return -EINVAL;
+	}
+
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc())
+		return -ENOENT;
+
+	mutex_lock(&ip_vs_sched_mutex);
+
+	if (!list_empty(&scheduler->n_list)) {
+		mutex_unlock(&ip_vs_sched_mutex);
+		ip_vs_use_count_dec();
+		pr_err("%s(): [%s] scheduler already linked\n",
+		       __func__, scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *  Make sure that the scheduler with this name doesn't exist
+	 *  in the scheduler list.
+	 */
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		if (strcmp(scheduler->name, sched->name) == 0) {
+			mutex_unlock(&ip_vs_sched_mutex);
+			ip_vs_use_count_dec();
+			pr_err("%s(): [%s] scheduler already existed "
+			       "in the system\n", __func__, scheduler->name);
+			return -EINVAL;
+		}
+	}
+	/*
+	 *	Add it into the d-linked scheduler list
+	 */
+	list_add(&scheduler->n_list, &ip_vs_schedulers);
+	mutex_unlock(&ip_vs_sched_mutex);
+
+	pr_info("[%s] scheduler registered.\n", scheduler->name);
+
+	return 0;
+}
+
+
+/*
+ *  Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	if (!scheduler) {
+		pr_err("%s(): NULL arg\n", __func__);
+		return -EINVAL;
+	}
+
+	mutex_lock(&ip_vs_sched_mutex);
+	if (list_empty(&scheduler->n_list)) {
+		mutex_unlock(&ip_vs_sched_mutex);
+		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
+		       __func__, scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Remove it from the d-linked scheduler list
+	 */
+	list_del(&scheduler->n_list);
+	mutex_unlock(&ip_vs_sched_mutex);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	pr_info("[%s] scheduler unregistered.\n", scheduler->name);
+
+	return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
new file mode 100644
index 000000000..7663288e5
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_sed_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_sed_dest_overhead(dest);
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "SED: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+	.name =			"sed",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+	.schedule =		ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
new file mode 100644
index 000000000..c2028e412
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Source Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ * Changes:
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[src_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) or (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ * The weight destination attribute can be used to control the
+ * distribution of connections to the destinations in servernode. The
+ * greater the weight, the more connections the destination
+ * will receive.
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
+
+/*
+ *      IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
+};
+
+/*
+ *     for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS        8
+#endif
+#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
+
+struct ip_vs_sh_state {
+	struct rcu_head			rcu_head;
+	struct ip_vs_sh_bucket		buckets[IP_VS_SH_TAB_SIZE];
+};
+
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->weight) <= 0 ||
+	       dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+/*
+ *	Returns hash value for IPVS SH entry
+ */
+static inline unsigned int
+ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
+		 __be16 port, unsigned int offset)
+{
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+	return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
+				 IP_VS_SH_TAB_BITS)) &
+		IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+	     const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
+
+	return (!dest || is_unavailable(dest)) ? NULL : dest;
+}
+
+
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ *
+ * The fallback strategy loops around the table starting from a "random"
+ * point (in fact, it is chosen to be the original hash value to make the
+ * algorithm deterministic) to find a new server.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+		      const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int offset, roffset;
+	unsigned int hash, ihash;
+	struct ip_vs_dest *dest;
+
+	/* first try the dest it's supposed to go to */
+	ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	dest = rcu_dereference(s->buckets[ihash].dest);
+	if (!dest)
+		return NULL;
+	if (!is_unavailable(dest))
+		return dest;
+
+	IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
+
+	/* if the original dest is unavailable, loop around the table
+	 * starting from ihash to find a new dest
+	 */
+	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+		roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
+		hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
+		dest = rcu_dereference(s->buckets[hash].dest);
+		if (!dest)
+			break;
+		if (!is_unavailable(dest))
+			return dest;
+		IP_VS_DBG_BUF(6, "SH: selected unavailable "
+			      "server %s:%d (offset %d), reselecting",
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port), roffset);
+	}
+
+	return NULL;
+}
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+	int d_count;
+	bool empty;
+
+	b = &s->buckets[0];
+	p = &svc->destinations;
+	empty = list_empty(p);
+	d_count = 0;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest)
+			ip_vs_dest_put(dest);
+		if (empty)
+			RCU_INIT_POINTER(b->dest, NULL);
+		else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			ip_vs_dest_hold(dest);
+			RCU_INIT_POINTER(b->dest, dest);
+
+			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
+				      i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
+				      atomic_read(&dest->weight));
+
+			/* Don't move to next dest until filling weight */
+			if (++d_count >= atomic_read(&dest->weight)) {
+				p = p->next;
+				d_count = 0;
+			}
+
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct ip_vs_dest *dest;
+
+	b = &s->buckets[0];
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		dest = rcu_dereference_protected(b->dest, 1);
+		if (dest) {
+			ip_vs_dest_put(dest);
+			RCU_INIT_POINTER(b->dest, NULL);
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_state *s;
+
+	/* allocate the SH table for this service */
+	s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
+	if (s == NULL)
+		return -ENOMEM;
+
+	svc->sched_data = s;
+	IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	/* assign the hash buckets with current dests */
+	ip_vs_sh_reassign(s, svc);
+
+	return 0;
+}
+
+
+static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_state *s = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(s);
+
+	/* release the table itself */
+	kfree_rcu(s, rcu_head);
+	IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+}
+
+
+static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
+{
+	struct ip_vs_sh_state *s = svc->sched_data;
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_reassign(s, svc);
+
+	return 0;
+}
+
+
+/* Helper function to get port number */
+static inline __be16
+ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
+{
+	__be16 _ports[2], *ports;
+
+	/* At this point we know that we have a valid packet of some kind.
+	 * Because ICMP packets are only guaranteed to have the first 8
+	 * bytes, let's just grab the ports.  Fortunately they're in the
+	 * same position for all three of the protocols we care about.
+	 */
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_SCTP:
+		ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+					   &_ports);
+		if (unlikely(!ports))
+			return 0;
+
+		if (likely(!ip_vs_iph_inverse(iph)))
+			return ports[0];
+		else
+			return ports[1];
+	default:
+		return 0;
+	}
+}
+
+
+/*
+ *      Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		  struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_sh_state *s;
+	__be16 port = 0;
+	const union nf_inet_addr *hash_addr;
+
+	hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
+
+	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
+		port = ip_vs_sh_get_port(skb, iph);
+
+	s = (struct ip_vs_sh_state *) svc->sched_data;
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
+		dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
+	else
+		dest = ip_vs_sh_get(svc, s, hash_addr, port);
+
+	if (!dest) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+	IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
+		      IP_VS_DBG_ADDR(svc->af, hash_addr),
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+		      ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+	.name =			"sh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+	.init_service =		ip_vs_sh_init_svc,
+	.done_service =		ip_vs_sh_done_svc,
+	.add_dest =		ip_vs_sh_dest_changed,
+	.del_dest =		ip_vs_sh_dest_changed,
+	.upd_dest =		ip_vs_sh_dest_changed,
+	.schedule =		ip_vs_sh_schedule,
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+	synchronize_rcu();
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
new file mode 100644
index 000000000..e1dea9a82
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -0,0 +1,2051 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync:  sync connection info from master load balancer to backups
+ *              through multicast
+ *
+ * Changes:
+ *	Alexandre Cassen	:	Added master & backup support at a time.
+ *	Alexandre Cassen	:	Added SyncID support for incoming sync
+ *					messages filtering.
+ *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ *	Hans Schillstrom	:	Added Version 1: i.e. IPv6,
+ *					Persistence support, fwmark and time-out.
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/inetdevice.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+
+#include <asm/unaligned.h>		/* Used for ntoh_seq and hton_seq */
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT  8848          /* multicast port */
+
+#define SYNC_PROTO_VER  1		/* Protocol version in header */
+
+static struct lock_class_key __ipvs_sync_key;
+/*
+ *	IPVS sync connection entry
+ *	Version 0, i.e. original version.
+ */
+struct ip_vs_sync_conn_v0 {
+	__u8			reserved;
+
+	/* Protocol, addresses and port numbers */
+	__u8			protocol;       /* Which protocol (TCP/UDP) */
+	__be16			cport;
+	__be16                  vport;
+	__be16                  dport;
+	__be32                  caddr;          /* client address */
+	__be32                  vaddr;          /* virtual address */
+	__be32                  daddr;          /* destination address */
+
+	/* Flags and state transition */
+	__be16                  flags;          /* status flags */
+	__be16                  state;          /* state info */
+
+	/* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	__be32			caddr;		/* client address */
+	__be32			vaddr;		/* virtual address */
+	__be32			daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	struct in6_addr		caddr;		/* client address */
+	struct in6_addr		vaddr;		/* virtual address */
+	struct in6_addr		daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+	struct ip_vs_sync_v4	v4;
+	struct ip_vs_sync_v6	v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6		0
+#define STYPE_F_INET6		(1 << STYPE_INET6)
+
+#define SVER_SHIFT		12		/* Shift to get version */
+#define SVER_MASK		0x0fff		/* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA	1
+#define IPVS_OPT_PE_DATA	2
+#define IPVS_OPT_PE_NAME	3
+#define IPVS_OPT_PARAM		7
+
+#define IPVS_OPT_F_SEQ_DATA	(1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA	(1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME	(1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM	(1 << (IPVS_OPT_PARAM-1))
+
+struct ip_vs_sync_thread_data {
+	struct task_struct *task;
+	struct netns_ipvs *ipvs;
+	struct socket *sock;
+	char *buf;
+	int id;
+};
+
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
+#define FULL_CONN_SIZE  \
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (1)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                            .                                  |
+      ~                            .                                  ~
+      |                            .                                  |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (n)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
+*/
+
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
+	__u8                    nr_conns;
+	__u8                    syncid;
+	__be16                  size;
+
+	/* ip_vs_sync_conn entries start here */
+};
+
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+	__u8			reserved;	/* must be zero */
+	__u8			syncid;
+	__be16			size;
+	__u8			nr_conns;
+	__s8			version;	/* SYNC_PROTO_VER  */
+	__u16			spare;
+	/* ip_vs_sync_conn entries start here */
+};
+
+union ipvs_sockaddr {
+	struct sockaddr_in	in;
+	struct sockaddr_in6	in6;
+};
+
+struct ip_vs_sync_buff {
+	struct list_head        list;
+	unsigned long           firstuse;
+
+	/* pointers for the message data */
+	struct ip_vs_sync_mesg  *mesg;
+	unsigned char           *head;
+	unsigned char           *end;
+};
+
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+	memset(ho, 0, sizeof(*ho));
+	ho->init_seq       = get_unaligned_be32(&no->init_seq);
+	ho->delta          = get_unaligned_be32(&no->delta);
+	ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+	put_unaligned_be32(ho->init_seq, &no->init_seq);
+	put_unaligned_be32(ho->delta, &no->delta);
+	put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
+
+static inline struct ip_vs_sync_buff *
+sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ipvs->sync_lock);
+	if (list_empty(&ms->sync_queue)) {
+		sb = NULL;
+		__set_current_state(TASK_INTERRUPTIBLE);
+	} else {
+		sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
+				list);
+		list_del(&sb->list);
+		ms->sync_queue_len--;
+		if (!ms->sync_queue_len)
+			ms->sync_queue_delay = 0;
+	}
+	spin_unlock_bh(&ipvs->sync_lock);
+
+	return sb;
+}
+
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
+{
+	struct ip_vs_sync_buff *sb;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
+		    ipvs->mcfg.sync_maxlen);
+	sb->mesg = kmalloc(len, GFP_ATOMIC);
+	if (!sb->mesg) {
+		kfree(sb);
+		return NULL;
+	}
+	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */
+	sb->mesg->version = SYNC_PROTO_VER;
+	sb->mesg->syncid = ipvs->mcfg.syncid;
+	sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
+	sb->mesg->nr_conns = 0;
+	sb->mesg->spare = 0;
+	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+	sb->end = (unsigned char *)sb->mesg + len;
+
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+	kfree(sb->mesg);
+	kfree(sb);
+}
+
+static inline void sb_queue_tail(struct netns_ipvs *ipvs,
+				 struct ipvs_master_sync_state *ms)
+{
+	struct ip_vs_sync_buff *sb = ms->sync_buff;
+
+	spin_lock(&ipvs->sync_lock);
+	if (ipvs->sync_state & IP_VS_STATE_MASTER &&
+	    ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
+		if (!ms->sync_queue_len)
+			schedule_delayed_work(&ms->master_wakeup_work,
+					      max(IPVS_SYNC_SEND_DELAY, 1));
+		ms->sync_queue_len++;
+		list_add_tail(&sb->list, &ms->sync_queue);
+		if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) {
+			int id = (int)(ms - ipvs->ms);
+
+			wake_up_process(ipvs->master_tinfo[id].task);
+		}
+	} else
+		ip_vs_sync_buff_release(sb);
+	spin_unlock(&ipvs->sync_lock);
+}
+
+/*
+ *	Get the current sync buffer if it has been created for more
+ *	than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
+		   unsigned long time)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	sb = ms->sync_buff;
+	if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
+		ms->sync_buff = NULL;
+		__set_current_state(TASK_RUNNING);
+	} else
+		sb = NULL;
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+	return sb;
+}
+
+static inline int
+select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
+{
+	return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
+}
+
+/*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
+{
+	struct ip_vs_sync_buff *sb;
+	struct ip_vs_sync_mesg_v0 *mesg;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
+		    ipvs->mcfg.sync_maxlen);
+	sb->mesg = kmalloc(len, GFP_ATOMIC);
+	if (!sb->mesg) {
+		kfree(sb);
+		return NULL;
+	}
+	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+	mesg->nr_conns = 0;
+	mesg->syncid = ipvs->mcfg.syncid;
+	mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
+	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+	sb->end = (unsigned char *)mesg + len;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+/* Check if connection is controlled by persistence */
+static inline bool in_persistence(struct ip_vs_conn *cp)
+{
+	for (cp = cp->control; cp; cp = cp->control) {
+		if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+			return true;
+	}
+	return false;
+}
+
+/* Check if conn should be synced.
+ * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
+ * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
+ *	sync_retries times with period of sync_refresh_period/8
+ * - (2) if both sync_refresh_period and sync_period are 0 send sync only
+ *	for state changes or only once when pkts matches sync_threshold
+ * - (3) templates: rate can be reduced only with sync_refresh_period or
+ *	with (2)
+ */
+static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
+				  struct ip_vs_conn *cp, int pkts)
+{
+	unsigned long orig = READ_ONCE(cp->sync_endtime);
+	unsigned long now = jiffies;
+	unsigned long n = (now + cp->timeout) & ~3UL;
+	unsigned int sync_refresh_period;
+	int sync_period;
+	int force;
+
+	/* Check if we sync in current state */
+	if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
+		force = 0;
+	else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
+		return 0;
+	else if (likely(cp->protocol == IPPROTO_TCP)) {
+		if (!((1 << cp->state) &
+		      ((1 << IP_VS_TCP_S_ESTABLISHED) |
+		       (1 << IP_VS_TCP_S_FIN_WAIT) |
+		       (1 << IP_VS_TCP_S_CLOSE) |
+		       (1 << IP_VS_TCP_S_CLOSE_WAIT) |
+		       (1 << IP_VS_TCP_S_TIME_WAIT))))
+			return 0;
+		force = cp->state != cp->old_state;
+		if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
+			goto set;
+	} else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
+		if (!((1 << cp->state) &
+		      ((1 << IP_VS_SCTP_S_ESTABLISHED) |
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
+		       (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
+		       (1 << IP_VS_SCTP_S_CLOSED))))
+			return 0;
+		force = cp->state != cp->old_state;
+		if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
+			goto set;
+	} else {
+		/* UDP or another protocol with single state */
+		force = 0;
+	}
+
+	sync_refresh_period = sysctl_sync_refresh_period(ipvs);
+	if (sync_refresh_period > 0) {
+		long diff = n - orig;
+		long min_diff = max(cp->timeout >> 1, 10UL * HZ);
+
+		/* Avoid sync if difference is below sync_refresh_period
+		 * and below the half timeout.
+		 */
+		if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
+			int retries = orig & 3;
+
+			if (retries >= sysctl_sync_retries(ipvs))
+				return 0;
+			if (time_before(now, orig - cp->timeout +
+					(sync_refresh_period >> 3)))
+				return 0;
+			n |= retries + 1;
+		}
+	}
+	sync_period = sysctl_sync_period(ipvs);
+	if (sync_period > 0) {
+		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
+		    pkts % sync_period != sysctl_sync_threshold(ipvs))
+			return 0;
+	} else if (!sync_refresh_period &&
+		   pkts != sysctl_sync_threshold(ipvs))
+		return 0;
+
+set:
+	cp->old_state = cp->state;
+	n = cmpxchg(&cp->sync_endtime, orig, n);
+	return n == orig || force;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
+ *      Add an ip_vs_conn information into the current sync_buff.
+ */
+static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
+			       int pkts)
+{
+	struct ip_vs_sync_mesg_v0 *m;
+	struct ip_vs_sync_conn_v0 *s;
+	struct ip_vs_sync_buff *buff;
+	struct ipvs_master_sync_state *ms;
+	int id;
+	unsigned int len;
+
+	if (unlikely(cp->af != AF_INET))
+		return;
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return;
+
+	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+		return;
+
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		spin_unlock_bh(&ipvs->sync_buff_lock);
+		return;
+	}
+
+	id = select_master_thread_id(ipvs, cp);
+	ms = &ipvs->ms[id];
+	buff = ms->sync_buff;
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
+	if (buff) {
+		m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+		/* Send buffer if it is for v1 */
+		if (buff->head + len > buff->end || !m->nr_conns) {
+			sb_queue_tail(ipvs, ms);
+			ms->sync_buff = NULL;
+			buff = NULL;
+		}
+	}
+	if (!buff) {
+		buff = ip_vs_sync_buff_create_v0(ipvs, len);
+		if (!buff) {
+			spin_unlock_bh(&ipvs->sync_buff_lock);
+			pr_err("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+		ms->sync_buff = buff;
+	}
+
+	m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+	s = (struct ip_vs_sync_conn_v0 *) buff->head;
+
+	/* copy members */
+	s->reserved = 0;
+	s->protocol = cp->protocol;
+	s->cport = cp->cport;
+	s->vport = cp->vport;
+	s->dport = cp->dport;
+	s->caddr = cp->caddr.ip;
+	s->vaddr = cp->vaddr.ip;
+	s->daddr = cp->daddr.ip;
+	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->state = htons(cp->state);
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		struct ip_vs_sync_conn_options *opt =
+			(struct ip_vs_sync_conn_options *)&s[1];
+		memcpy(opt, &cp->sync_conn_opt, sizeof(*opt));
+	}
+
+	m->nr_conns++;
+	m->size = htons(ntohs(m->size) + len);
+	buff->head += len;
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+
+	/* synchronize its controller if it has */
+	cp = cp->control;
+	if (cp) {
+		if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+			pkts = atomic_inc_return(&cp->in_pkts);
+		else
+			pkts = sysctl_sync_threshold(ipvs);
+		ip_vs_sync_conn(ipvs, cp, pkts);
+	}
+}
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
+{
+	struct ip_vs_sync_mesg *m;
+	union ip_vs_sync_conn *s;
+	struct ip_vs_sync_buff *buff;
+	struct ipvs_master_sync_state *ms;
+	int id;
+	__u8 *p;
+	unsigned int len, pe_name_len, pad;
+
+	/* Handle old version of the protocol */
+	if (sysctl_sync_ver(ipvs) == 0) {
+		ip_vs_sync_conn_v0(ipvs, cp, pkts);
+		return;
+	}
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		goto control;
+sloop:
+	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+		goto control;
+
+	/* Sanity checks */
+	pe_name_len = 0;
+	if (cp->pe_data_len) {
+		if (!cp->pe_data || !cp->dest) {
+			IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+			return;
+		}
+		pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+	}
+
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		spin_unlock_bh(&ipvs->sync_buff_lock);
+		return;
+	}
+
+	id = select_master_thread_id(ipvs, cp);
+	ms = &ipvs->ms[id];
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		len = sizeof(struct ip_vs_sync_v6);
+	else
+#endif
+		len = sizeof(struct ip_vs_sync_v4);
+
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+		len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+	if (cp->pe_data_len)
+		len += cp->pe_data_len + 2;	/* + Param hdr field */
+	if (pe_name_len)
+		len += pe_name_len + 2;
+
+	/* check if there is a space for this one  */
+	pad = 0;
+	buff = ms->sync_buff;
+	if (buff) {
+		m = buff->mesg;
+		pad = (4 - (size_t) buff->head) & 3;
+		/* Send buffer if it is for v0 */
+		if (buff->head + len + pad > buff->end || m->reserved) {
+			sb_queue_tail(ipvs, ms);
+			ms->sync_buff = NULL;
+			buff = NULL;
+			pad = 0;
+		}
+	}
+
+	if (!buff) {
+		buff = ip_vs_sync_buff_create(ipvs, len);
+		if (!buff) {
+			spin_unlock_bh(&ipvs->sync_buff_lock);
+			pr_err("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+		ms->sync_buff = buff;
+		m = buff->mesg;
+	}
+
+	p = buff->head;
+	buff->head += pad + len;
+	m->size = htons(ntohs(m->size) + pad + len);
+	/* Add ev. padding from prev. sync_conn */
+	while (pad--)
+		*(p++) = 0;
+
+	s = (union ip_vs_sync_conn *)p;
+
+	/* Set message type  & copy members */
+	s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+	s->v4.ver_size = htons(len & SVER_MASK);	/* Version 0 */
+	s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->v4.state = htons(cp->state);
+	s->v4.protocol = cp->protocol;
+	s->v4.cport = cp->cport;
+	s->v4.vport = cp->vport;
+	s->v4.dport = cp->dport;
+	s->v4.fwmark = htonl(cp->fwmark);
+	s->v4.timeout = htonl(cp->timeout / HZ);
+	m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6) {
+		p += sizeof(struct ip_vs_sync_v6);
+		s->v6.caddr = cp->caddr.in6;
+		s->v6.vaddr = cp->vaddr.in6;
+		s->v6.daddr = cp->daddr.in6;
+	} else
+#endif
+	{
+		p += sizeof(struct ip_vs_sync_v4);	/* options ptr */
+		s->v4.caddr = cp->caddr.ip;
+		s->v4.vaddr = cp->vaddr.ip;
+		s->v4.daddr = cp->daddr.ip;
+	}
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		*(p++) = IPVS_OPT_SEQ_DATA;
+		*(p++) = sizeof(struct ip_vs_sync_conn_options);
+		hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+		p += sizeof(struct ip_vs_seq);
+		hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+		p += sizeof(struct ip_vs_seq);
+	}
+	/* Handle pe data */
+	if (cp->pe_data_len && cp->pe_data) {
+		*(p++) = IPVS_OPT_PE_DATA;
+		*(p++) = cp->pe_data_len;
+		memcpy(p, cp->pe_data, cp->pe_data_len);
+		p += cp->pe_data_len;
+		if (pe_name_len) {
+			/* Add PE_NAME */
+			*(p++) = IPVS_OPT_PE_NAME;
+			*(p++) = pe_name_len;
+			memcpy(p, cp->pe->name, pe_name_len);
+			p += pe_name_len;
+		}
+	}
+
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+
+control:
+	/* synchronize its controller if it has */
+	cp = cp->control;
+	if (!cp)
+		return;
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+		pkts = atomic_inc_return(&cp->in_pkts);
+	else
+		pkts = sysctl_sync_threshold(ipvs);
+	goto sloop;
+}
+
+/*
+ *  fill_param used by version 1
+ */
+static inline int
+ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
+			   struct ip_vs_conn_param *p,
+			   __u8 *pe_data, unsigned int pe_data_len,
+			   __u8 *pe_name, unsigned int pe_name_len)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
+				      (const union nf_inet_addr *)&sc->v6.caddr,
+				      sc->v6.cport,
+				      (const union nf_inet_addr *)&sc->v6.vaddr,
+				      sc->v6.vport, p);
+	else
+#endif
+		ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
+				      (const union nf_inet_addr *)&sc->v4.caddr,
+				      sc->v4.cport,
+				      (const union nf_inet_addr *)&sc->v4.vaddr,
+				      sc->v4.vport, p);
+	/* Handle pe data */
+	if (pe_data_len) {
+		if (pe_name_len) {
+			char buff[IP_VS_PENAME_MAXLEN+1];
+
+			memcpy(buff, pe_name, pe_name_len);
+			buff[pe_name_len]=0;
+			p->pe = __ip_vs_pe_getbyname(buff);
+			if (!p->pe) {
+				IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+					     buff);
+				return 1;
+			}
+		} else {
+			IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+			return 1;
+		}
+
+		p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
+		if (!p->pe_data) {
+			module_put(p->pe->module);
+			return -ENOMEM;
+		}
+		p->pe_data_len = pe_data_len;
+	}
+	return 0;
+}
+
+/*
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
+ */
+static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
+			    unsigned int flags, unsigned int state,
+			    unsigned int protocol, unsigned int type,
+			    const union nf_inet_addr *daddr, __be16 dport,
+			    unsigned long timeout, __u32 fwmark,
+			    struct ip_vs_sync_conn_options *opt)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *cp;
+
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		cp = ip_vs_conn_in_get(param);
+		if (cp && ((cp->dport != dport) ||
+			   !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+			if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+				ip_vs_conn_expire_now(cp);
+				__ip_vs_conn_put(cp);
+				cp = NULL;
+			} else {
+				/* This is the expiration message for the
+				 * connection that was already replaced, so we
+				 * just ignore it.
+				 */
+				__ip_vs_conn_put(cp);
+				kfree(param->pe_data);
+				return;
+			}
+		}
+	} else {
+		cp = ip_vs_ct_in_get(param);
+	}
+
+	if (cp) {
+		/* Free pe_data */
+		kfree(param->pe_data);
+
+		dest = cp->dest;
+		spin_lock_bh(&cp->lock);
+		if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
+		    !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
+			if (flags & IP_VS_CONN_F_INACTIVE) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+			} else {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+			}
+		}
+		flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
+		flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
+		cp->flags = flags;
+		spin_unlock_bh(&cp->lock);
+		if (!dest)
+			ip_vs_try_bind_dest(cp);
+	} else {
+		/*
+		 * Find the appropriate destination for the connection.
+		 * If it is not found the connection will remain unbound
+		 * but still handled.
+		 */
+		rcu_read_lock();
+		/* This function is only invoked by the synchronization
+		 * code. We do not currently support heterogeneous pools
+		 * with synchronization, so we can make the assumption that
+		 * the svc_af is the same as the dest_af
+		 */
+		dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
+				       param->vaddr, param->vport, protocol,
+				       fwmark, flags);
+
+		cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+				    fwmark);
+		rcu_read_unlock();
+		if (!cp) {
+			kfree(param->pe_data);
+			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+			return;
+		}
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			kfree(param->pe_data);
+	}
+
+	if (opt) {
+		cp->in_seq = opt->in_seq;
+		cp->out_seq = opt->out_seq;
+	}
+	atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
+	cp->state = state;
+	cp->old_state = cp->state;
+	/*
+	 * For Ver 0 messages style
+	 *  - Not possible to recover the right timeout for templates
+	 *  - can not find the right fwmark
+	 *    virtual service. If needed, we can do it for
+	 *    non-fwmark persistent services.
+	 * Ver 1 messages style.
+	 *  - No problem.
+	 */
+	if (timeout) {
+		if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+			timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+		cp->timeout = timeout*HZ;
+	} else {
+		struct ip_vs_proto_data *pd;
+
+		pd = ip_vs_proto_data_get(ipvs, protocol);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+			cp->timeout = pd->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
+	}
+	ip_vs_conn_put(cp);
+}
+
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
+				     const size_t buflen)
+{
+	struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+	struct ip_vs_sync_conn_v0 *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	char *p;
+	int i;
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
+	for (i=0; i<m->nr_conns; i++) {
+		unsigned int flags, state;
+
+		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+			IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
+			return;
+		}
+		s = (struct ip_vs_sync_conn_v0 *) p;
+		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+		flags &= ~IP_VS_CONN_F_HASHED;
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			p += FULL_CONN_SIZE;
+			if (p > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
+				return;
+			}
+		} else {
+			opt = NULL;
+			p += SIMPLE_CONN_SIZE;
+		}
+
+		state = ntohs(s->state);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+			pp = ip_vs_proto_get(s->protocol);
+			if (!pp) {
+				IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
+					s->protocol);
+				continue;
+			}
+			if (state >= pp->num_states) {
+				IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
+					pp->name, state);
+				continue;
+			}
+		} else {
+			if (state >= IP_VS_CTPL_S_LAST)
+				IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
+					  state);
+		}
+
+		ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
+				      (const union nf_inet_addr *)&s->caddr,
+				      s->cport,
+				      (const union nf_inet_addr *)&s->vaddr,
+				      s->vport, &param);
+
+		/* Send timeout as Zero */
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
+				(union nf_inet_addr *)&s->daddr, s->dport,
+				0, 0, opt);
+	}
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+				    __u32 *opt_flags,
+				    struct ip_vs_sync_conn_options *opt)
+{
+	struct ip_vs_sync_conn_options *topt;
+
+	topt = (struct ip_vs_sync_conn_options *)p;
+
+	if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+		IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+		return -EINVAL;
+	}
+	if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+		IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+		return -EINVAL;
+	}
+	ntoh_seq(&topt->in_seq, &opt->in_seq);
+	ntoh_seq(&topt->out_seq, &opt->out_seq);
+	*opt_flags |= IPVS_OPT_F_SEQ_DATA;
+	return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+			  __u8 **data, unsigned int maxlen,
+			  __u32 *opt_flags, __u32 flag)
+{
+	if (plen > maxlen) {
+		IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+		return -EINVAL;
+	}
+	if (*opt_flags & flag) {
+		IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+		return -EINVAL;
+	}
+	*data_len = plen;
+	*data = p;
+	*opt_flags |= flag;
+	return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
+{
+	struct ip_vs_sync_conn_options opt;
+	union  ip_vs_sync_conn *s;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	__u32 flags;
+	unsigned int af, state, pe_data_len=0, pe_name_len=0;
+	__u8 *pe_data=NULL, *pe_name=NULL;
+	__u32 opt_flags=0;
+	int retc=0;
+
+	s = (union ip_vs_sync_conn *) p;
+
+	if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+		af = AF_INET6;
+		p += sizeof(struct ip_vs_sync_v6);
+#else
+		IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+		retc = 10;
+		goto out;
+#endif
+	} else if (!s->v4.type) {
+		af = AF_INET;
+		p += sizeof(struct ip_vs_sync_v4);
+	} else {
+		return -10;
+	}
+	if (p > msg_end)
+		return -20;
+
+	/* Process optional params check Type & Len. */
+	while (p < msg_end) {
+		int ptype;
+		int plen;
+
+		if (p+2 > msg_end)
+			return -30;
+		ptype = *(p++);
+		plen  = *(p++);
+
+		if (!plen || ((p + plen) > msg_end))
+			return -40;
+		/* Handle seq option  p = param data */
+		switch (ptype & ~IPVS_OPT_F_PARAM) {
+		case IPVS_OPT_SEQ_DATA:
+			if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+				return -50;
+			break;
+
+		case IPVS_OPT_PE_DATA:
+			if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+					   IP_VS_PEDATA_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_DATA))
+				return -60;
+			break;
+
+		case IPVS_OPT_PE_NAME:
+			if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+					   IP_VS_PENAME_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_NAME))
+				return -70;
+			break;
+
+		default:
+			/* Param data mandatory ? */
+			if (!(ptype & IPVS_OPT_F_PARAM)) {
+				IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+					  ptype & ~IPVS_OPT_F_PARAM);
+				retc = 20;
+				goto out;
+			}
+		}
+		p += plen;  /* Next option */
+	}
+
+	/* Get flags and Mask off unsupported */
+	flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+	flags |= IP_VS_CONN_F_SYNC;
+	state = ntohs(s->v4.state);
+
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		pp = ip_vs_proto_get(s->v4.protocol);
+		if (!pp) {
+			IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+				s->v4.protocol);
+			retc = 30;
+			goto out;
+		}
+		if (state >= pp->num_states) {
+			IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+				pp->name, state);
+			retc = 40;
+			goto out;
+		}
+	} else {
+		if (state >= IP_VS_CTPL_S_LAST)
+			IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
+				  state);
+	}
+	if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
+				       pe_data_len, pe_name, pe_name_len)) {
+		retc = 50;
+		goto out;
+	}
+	/* If only IPv4, just silent skip IPv6 */
+	if (af == AF_INET)
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
+				(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+				ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#ifdef CONFIG_IP_VS_IPV6
+	else
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
+				(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+				ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#endif
+	ip_vs_pe_put(param.pe);
+	return 0;
+	/* Error exit */
+out:
+	IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+	return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
+				  const size_t buflen)
+{
+	struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+	__u8 *p, *msg_end;
+	int i, nr_conns;
+
+	if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+		IP_VS_DBG(2, "BACKUP, message header too short\n");
+		return;
+	}
+
+	if (buflen != ntohs(m2->size)) {
+		IP_VS_DBG(2, "BACKUP, bogus message size\n");
+		return;
+	}
+	/* SyncID sanity check */
+	if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
+		IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+		return;
+	}
+	/* Handle version 1  message */
+	if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+	    && (m2->spare == 0)) {
+
+		msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+		nr_conns = m2->nr_conns;
+
+		for (i=0; i<nr_conns; i++) {
+			union ip_vs_sync_conn *s;
+			unsigned int size;
+			int retc;
+
+			p = msg_end;
+			if (p + sizeof(s->v4) > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, too small\n");
+				return;
+			}
+			s = (union ip_vs_sync_conn *)p;
+			size = ntohs(s->v4.ver_size) & SVER_MASK;
+			msg_end = p + size;
+			/* Basic sanity checks */
+			if (msg_end  > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
+				return;
+			}
+			if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+					      ntohs(s->v4.ver_size) >> SVER_SHIFT);
+				return;
+			}
+			/* Process a single sync_conn */
+			retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
+			if (retc < 0) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+					     retc);
+				return;
+			}
+			/* Make sure we have 32 bit alignment */
+			msg_end = p + ((size + 3) & ~3);
+		}
+	} else {
+		/* Old type of message */
+		ip_vs_process_message_v0(ipvs, buffer, buflen);
+		return;
+	}
+}
+
+
+/*
+ *      Setup sndbuf (mode=1) or rcvbuf (mode=0)
+ */
+static void set_sock_size(struct sock *sk, int mode, int val)
+{
+	/* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
+	/* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
+	lock_sock(sk);
+	if (mode) {
+		val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+			      READ_ONCE(sysctl_wmem_max));
+		sk->sk_sndbuf = val * 2;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	} else {
+		val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+			      READ_ONCE(sysctl_rmem_max));
+		sk->sk_rcvbuf = val * 2;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+	release_sock(sk);
+}
+
+/*
+ *      Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+	lock_sock(sk);
+	inet->mc_loop = loop ? 1 : 0;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MULTICAST_LOOP */
+		np->mc_loop = loop ? 1 : 0;
+	}
+#endif
+	release_sock(sk);
+}
+
+/*
+ *      Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+	lock_sock(sk);
+	inet->mc_ttl = ttl;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MULTICAST_HOPS */
+		np->mcast_hops = ttl;
+	}
+#endif
+	release_sock(sk);
+}
+
+/* Control fragmentation of messages */
+static void set_mcast_pmtudisc(struct sock *sk, int val)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
+	lock_sock(sk);
+	inet->pmtudisc = val;
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MTU_DISCOVER */
+		np->pmtudisc = val;
+	}
+#endif
+	release_sock(sk);
+}
+
+/*
+ *      Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, struct net_device *dev)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	inet->mc_index = dev->ifindex;
+	/*  inet->mc_addr  = 0; */
+#ifdef CONFIG_IP_VS_IPV6
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		/* IPV6_MULTICAST_IF */
+		np->mcast_oif = dev->ifindex;
+	}
+#endif
+	release_sock(sk);
+
+	return 0;
+}
+
+
+/*
+ *      Join a multicast group.
+ *      the group is specified by a class D multicast address 224.0.0.0/8
+ *      in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
+{
+	struct ip_mreqn mreq;
+	int ret;
+
+	memset(&mreq, 0, sizeof(mreq));
+	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	mreq.imr_ifindex = dev->ifindex;
+
+	lock_sock(sk);
+	ret = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
+			     struct net_device *dev)
+{
+	int ret;
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
+	release_sock(sk);
+
+	return ret;
+}
+#endif
+
+static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
+{
+	__be32 addr;
+	struct sockaddr_in sin;
+
+	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	if (!addr)
+		pr_err("You probably need to specify IP address on "
+		       "multicast interface.\n");
+
+	IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
+		  dev->name, &addr);
+
+	/* Now bind the socket with the address of multicast interface */
+	sin.sin_family	     = AF_INET;
+	sin.sin_addr.s_addr  = addr;
+	sin.sin_port         = 0;
+
+	return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+}
+
+static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
+			       struct ipvs_sync_daemon_cfg *c, int id)
+{
+	if (AF_INET6 == c->mcast_af) {
+		sa->in6 = (struct sockaddr_in6) {
+			.sin6_family = AF_INET6,
+			.sin6_port = htons(c->mcast_port + id),
+		};
+		sa->in6.sin6_addr = c->mcast_group.in6;
+		*salen = sizeof(sa->in6);
+	} else {
+		sa->in = (struct sockaddr_in) {
+			.sin_family = AF_INET,
+			.sin_port = htons(c->mcast_port + id),
+		};
+		sa->in.sin_addr = c->mcast_group.in;
+		*salen = sizeof(sa->in);
+	}
+}
+
+/*
+ *      Set up sending multicast socket over UDP
+ */
+static int make_send_sock(struct netns_ipvs *ipvs, int id,
+			  struct net_device *dev, struct socket **sock_ret)
+{
+	/* multicast addr */
+	union ipvs_sockaddr mcast_addr;
+	struct socket *sock;
+	int result, salen;
+
+	/* First create a socket */
+	result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+				  IPPROTO_UDP, &sock);
+	if (result < 0) {
+		pr_err("Error during creation of socket; terminating\n");
+		goto error;
+	}
+	*sock_ret = sock;
+	result = set_mcast_if(sock->sk, dev);
+	if (result < 0) {
+		pr_err("Error setting outbound mcast interface\n");
+		goto error;
+	}
+
+	set_mcast_loop(sock->sk, 0);
+	set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
+	/* Allow fragmentation if MTU changes */
+	set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 1, result);
+
+	if (AF_INET == ipvs->mcfg.mcast_af)
+		result = bind_mcastif_addr(sock, dev);
+	else
+		result = 0;
+	if (result < 0) {
+		pr_err("Error binding address of the mcast interface\n");
+		goto error;
+	}
+
+	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
+	result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+				salen, 0);
+	if (result < 0) {
+		pr_err("Error connecting to the multicast addr\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	return result;
+}
+
+
+/*
+ *      Set up receiving multicast socket over UDP
+ */
+static int make_receive_sock(struct netns_ipvs *ipvs, int id,
+			     struct net_device *dev, struct socket **sock_ret)
+{
+	/* multicast addr */
+	union ipvs_sockaddr mcast_addr;
+	struct socket *sock;
+	int result, salen;
+
+	/* First create a socket */
+	result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+				  IPPROTO_UDP, &sock);
+	if (result < 0) {
+		pr_err("Error during creation of socket; terminating\n");
+		goto error;
+	}
+	*sock_ret = sock;
+	/* it is equivalent to the REUSEADDR option in user-space */
+	sock->sk->sk_reuse = SK_CAN_REUSE;
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 0, result);
+
+	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+	sock->sk->sk_bound_dev_if = dev->ifindex;
+	result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen);
+	if (result < 0) {
+		pr_err("Error binding to the multicast addr\n");
+		goto error;
+	}
+
+	/* join the multicast group */
+#ifdef CONFIG_IP_VS_IPV6
+	if (ipvs->bcfg.mcast_af == AF_INET6)
+		result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
+					   dev);
+	else
+#endif
+		result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
+					  dev);
+	if (result < 0) {
+		pr_err("Error joining to the multicast group\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	return result;
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+	struct msghdr	msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
+	struct kvec	iov;
+	int		len;
+
+	EnterFunction(7);
+	iov.iov_base     = (void *)buffer;
+	iov.iov_len      = length;
+
+	len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
+
+	LeaveFunction(7);
+	return len;
+}
+
+static int
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+	int msize;
+	int ret;
+
+	msize = ntohs(msg->size);
+
+	ret = ip_vs_send_async(sock, (char *)msg, msize);
+	if (ret >= 0 || ret == -EAGAIN)
+		return ret;
+	pr_err("ip_vs_send_async error %d\n", ret);
+	return 0;
+}
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+	struct msghdr		msg = {NULL,};
+	struct kvec		iov = {buffer, buflen};
+	int			len;
+
+	EnterFunction(7);
+
+	/* Receive a packet */
+	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, buflen);
+	len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
+	if (len < 0)
+		return len;
+
+	LeaveFunction(7);
+	return len;
+}
+
+/* Wakeup the master thread for sending */
+static void master_wakeup_work_handler(struct work_struct *work)
+{
+	struct ipvs_master_sync_state *ms =
+		container_of(work, struct ipvs_master_sync_state,
+			     master_wakeup_work.work);
+	struct netns_ipvs *ipvs = ms->ipvs;
+
+	spin_lock_bh(&ipvs->sync_lock);
+	if (ms->sync_queue_len &&
+	    ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+		int id = (int)(ms - ipvs->ms);
+
+		ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
+		wake_up_process(ipvs->master_tinfo[id].task);
+	}
+	spin_unlock_bh(&ipvs->sync_lock);
+}
+
+/* Get next buffer to send */
+static inline struct ip_vs_sync_buff *
+next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
+{
+	struct ip_vs_sync_buff *sb;
+
+	sb = sb_dequeue(ipvs, ms);
+	if (sb)
+		return sb;
+	/* Do not delay entries in buffer for more than 2 seconds */
+	return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
+}
+
+static int sync_thread_master(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = tinfo->ipvs;
+	struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
+	struct sock *sk = tinfo->sock->sk;
+	struct ip_vs_sync_buff *sb;
+
+	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
+		"syncid = %d, id = %d\n",
+		ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
+
+	for (;;) {
+		sb = next_sync_buff(ipvs, ms);
+		if (unlikely(kthread_should_stop()))
+			break;
+		if (!sb) {
+			schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
+			continue;
+		}
+		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
+			/* (Ab)use interruptible sleep to avoid increasing
+			 * the load avg.
+			 */
+			__wait_event_interruptible(*sk_sleep(sk),
+						   sock_writeable(sk) ||
+						   kthread_should_stop());
+			if (unlikely(kthread_should_stop()))
+				goto done;
+		}
+		ip_vs_sync_buff_release(sb);
+	}
+
+done:
+	__set_current_state(TASK_RUNNING);
+	if (sb)
+		ip_vs_sync_buff_release(sb);
+
+	/* clean up the sync_buff queue */
+	while ((sb = sb_dequeue(ipvs, ms)))
+		ip_vs_sync_buff_release(sb);
+	__set_current_state(TASK_RUNNING);
+
+	/* clean up the current sync_buff */
+	sb = get_curr_sync_buff(ipvs, ms, 0);
+	if (sb)
+		ip_vs_sync_buff_release(sb);
+
+	return 0;
+}
+
+
+static int sync_thread_backup(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = tinfo->ipvs;
+	struct sock *sk = tinfo->sock->sk;
+	struct udp_sock *up = udp_sk(sk);
+	int len;
+
+	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
+		"syncid = %d, id = %d\n",
+		ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(*sk_sleep(sk),
+					 !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+					 !skb_queue_empty_lockless(&up->reader_queue) ||
+					 kthread_should_stop());
+
+		/* do we have data now? */
+		while (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+		       !skb_queue_empty_lockless(&up->reader_queue)) {
+			len = ip_vs_receive(tinfo->sock, tinfo->buf,
+					ipvs->bcfg.sync_maxlen);
+			if (len <= 0) {
+				if (len != -EAGAIN)
+					pr_err("receiving message error\n");
+				break;
+			}
+
+			ip_vs_process_message(ipvs, tinfo->buf, len);
+		}
+	}
+
+	return 0;
+}
+
+
+int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
+		      int state)
+{
+	struct ip_vs_sync_thread_data *ti = NULL, *tinfo;
+	struct task_struct *task;
+	struct net_device *dev;
+	char *name;
+	int (*threadfn)(void *data);
+	int id = 0, count, hlen;
+	int result = -ENOMEM;
+	u16 mtu, min_mtu;
+
+	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
+		  sizeof(struct ip_vs_sync_conn_v0));
+
+	/* increase the module use count */
+	if (!ip_vs_use_count_inc())
+		return -ENOPROTOOPT;
+
+	/* Do not hold one mutex and then to block on another */
+	for (;;) {
+		rtnl_lock();
+		if (mutex_trylock(&ipvs->sync_mutex))
+			break;
+		rtnl_unlock();
+		mutex_lock(&ipvs->sync_mutex);
+		if (rtnl_trylock())
+			break;
+		mutex_unlock(&ipvs->sync_mutex);
+	}
+
+	if (!ipvs->sync_state) {
+		count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
+		ipvs->threads_mask = count - 1;
+	} else
+		count = ipvs->threads_mask + 1;
+
+	if (c->mcast_af == AF_UNSPEC) {
+		c->mcast_af = AF_INET;
+		c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
+	}
+	if (!c->mcast_port)
+		c->mcast_port = IP_VS_SYNC_PORT;
+	if (!c->mcast_ttl)
+		c->mcast_ttl = 1;
+
+	dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
+	if (!dev) {
+		pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
+		result = -ENODEV;
+		goto out_early;
+	}
+	hlen = (AF_INET6 == c->mcast_af) ?
+	       sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
+	       sizeof(struct iphdr) + sizeof(struct udphdr);
+	mtu = (state == IP_VS_STATE_BACKUP) ?
+		  clamp(dev->mtu, 1500U, 65535U) : 1500U;
+	min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
+
+	if (c->sync_maxlen)
+		c->sync_maxlen = clamp_t(unsigned int,
+					 c->sync_maxlen, min_mtu,
+					 65535 - hlen);
+	else
+		c->sync_maxlen = mtu - hlen;
+
+	if (state == IP_VS_STATE_MASTER) {
+		result = -EEXIST;
+		if (ipvs->ms)
+			goto out_early;
+
+		ipvs->mcfg = *c;
+		name = "ipvs-m:%d:%d";
+		threadfn = sync_thread_master;
+	} else if (state == IP_VS_STATE_BACKUP) {
+		result = -EEXIST;
+		if (ipvs->backup_tinfo)
+			goto out_early;
+
+		ipvs->bcfg = *c;
+		name = "ipvs-b:%d:%d";
+		threadfn = sync_thread_backup;
+	} else {
+		result = -EINVAL;
+		goto out_early;
+	}
+
+	if (state == IP_VS_STATE_MASTER) {
+		struct ipvs_master_sync_state *ms;
+
+		result = -ENOMEM;
+		ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
+		if (!ipvs->ms)
+			goto out;
+		ms = ipvs->ms;
+		for (id = 0; id < count; id++, ms++) {
+			INIT_LIST_HEAD(&ms->sync_queue);
+			ms->sync_queue_len = 0;
+			ms->sync_queue_delay = 0;
+			INIT_DELAYED_WORK(&ms->master_wakeup_work,
+					  master_wakeup_work_handler);
+			ms->ipvs = ipvs;
+		}
+	}
+	result = -ENOMEM;
+	ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data),
+		     GFP_KERNEL);
+	if (!ti)
+		goto out;
+
+	for (id = 0; id < count; id++) {
+		tinfo = &ti[id];
+		tinfo->ipvs = ipvs;
+		if (state == IP_VS_STATE_BACKUP) {
+			result = -ENOMEM;
+			tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
+					     GFP_KERNEL);
+			if (!tinfo->buf)
+				goto out;
+		}
+		tinfo->id = id;
+		if (state == IP_VS_STATE_MASTER)
+			result = make_send_sock(ipvs, id, dev, &tinfo->sock);
+		else
+			result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
+		if (result < 0)
+			goto out;
+
+		task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
+		if (IS_ERR(task)) {
+			result = PTR_ERR(task);
+			goto out;
+		}
+		tinfo->task = task;
+	}
+
+	/* mark as active */
+
+	if (state == IP_VS_STATE_MASTER)
+		ipvs->master_tinfo = ti;
+	else
+		ipvs->backup_tinfo = ti;
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	ipvs->sync_state |= state;
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+
+	mutex_unlock(&ipvs->sync_mutex);
+	rtnl_unlock();
+
+	return 0;
+
+out:
+	/* We do not need RTNL lock anymore, release it here so that
+	 * sock_release below can use rtnl_lock to leave the mcast group.
+	 */
+	rtnl_unlock();
+	id = min(id, count - 1);
+	if (ti) {
+		for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+			if (tinfo->task)
+				kthread_stop(tinfo->task);
+		}
+	}
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		kfree(ipvs->ms);
+		ipvs->ms = NULL;
+	}
+	mutex_unlock(&ipvs->sync_mutex);
+
+	/* No more mutexes, release socks */
+	if (ti) {
+		for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+			if (tinfo->sock)
+				sock_release(tinfo->sock);
+			kfree(tinfo->buf);
+		}
+		kfree(ti);
+	}
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+	return result;
+
+out_early:
+	mutex_unlock(&ipvs->sync_mutex);
+	rtnl_unlock();
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+	return result;
+}
+
+
+int stop_sync_thread(struct netns_ipvs *ipvs, int state)
+{
+	struct ip_vs_sync_thread_data *ti, *tinfo;
+	int id;
+	int retc = -EINVAL;
+
+	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+
+	mutex_lock(&ipvs->sync_mutex);
+	if (state == IP_VS_STATE_MASTER) {
+		retc = -ESRCH;
+		if (!ipvs->ms)
+			goto err;
+		ti = ipvs->master_tinfo;
+
+		/*
+		 * The lock synchronizes with sb_queue_tail(), so that we don't
+		 * add sync buffers to the queue, when we are already in
+		 * progress of stopping the master sync daemon.
+		 */
+
+		spin_lock_bh(&ipvs->sync_buff_lock);
+		spin_lock(&ipvs->sync_lock);
+		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock(&ipvs->sync_lock);
+		spin_unlock_bh(&ipvs->sync_buff_lock);
+
+		retc = 0;
+		for (id = ipvs->threads_mask; id >= 0; id--) {
+			struct ipvs_master_sync_state *ms = &ipvs->ms[id];
+			int ret;
+
+			tinfo = &ti[id];
+			pr_info("stopping master sync thread %d ...\n",
+				task_pid_nr(tinfo->task));
+			cancel_delayed_work_sync(&ms->master_wakeup_work);
+			ret = kthread_stop(tinfo->task);
+			if (retc >= 0)
+				retc = ret;
+		}
+		kfree(ipvs->ms);
+		ipvs->ms = NULL;
+		ipvs->master_tinfo = NULL;
+	} else if (state == IP_VS_STATE_BACKUP) {
+		retc = -ESRCH;
+		if (!ipvs->backup_tinfo)
+			goto err;
+		ti = ipvs->backup_tinfo;
+
+		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+		retc = 0;
+		for (id = ipvs->threads_mask; id >= 0; id--) {
+			int ret;
+
+			tinfo = &ti[id];
+			pr_info("stopping backup sync thread %d ...\n",
+				task_pid_nr(tinfo->task));
+			ret = kthread_stop(tinfo->task);
+			if (retc >= 0)
+				retc = ret;
+		}
+		ipvs->backup_tinfo = NULL;
+	} else {
+		goto err;
+	}
+	id = ipvs->threads_mask;
+	mutex_unlock(&ipvs->sync_mutex);
+
+	/* No more mutexes, release socks */
+	for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+		if (tinfo->sock)
+			sock_release(tinfo->sock);
+		kfree(tinfo->buf);
+	}
+	kfree(ti);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+	return retc;
+
+err:
+	mutex_unlock(&ipvs->sync_mutex);
+	return retc;
+}
+
+/*
+ * Initialize data struct for each netns
+ */
+int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
+{
+	__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
+	spin_lock_init(&ipvs->sync_lock);
+	spin_lock_init(&ipvs->sync_buff_lock);
+	return 0;
+}
+
+void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
+{
+	int retc;
+
+	retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Backup Daemon\n");
+}
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
new file mode 100644
index 000000000..f2579fc9c
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* IPVS:        Power of Twos Choice Scheduling module
+ *
+ * Authors:     Darby Payne <darby.payne@applovin.com>
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+#include <net/ip_vs.h>
+
+/*    Power of Twos Choice scheduling, algorithm originally described by
+ *    Michael Mitzenmacher.
+ *
+ *    Randomly picks two destinations and picks the one with the least
+ *    amount of connections
+ *
+ *    The algorithm calculates a few variables
+ *    - total_weight = sum of all weights
+ *    - rweight1 = random number between [0,total_weight]
+ *    - rweight2 = random number between [0,total_weight]
+ *
+ *    For each destination
+ *      decrement rweight1 and rweight2 by the destination weight
+ *      pick choice1 when rweight1 is <= 0
+ *      pick choice2 when rweight2 is <= 0
+ *
+ *    Return choice2 if choice2 has less connections than choice 1 normalized
+ *    by weight
+ *
+ * References
+ * ----------
+ *
+ * [Mitzenmacher 2016]
+ *    The Power of Two Random Choices: A Survey of Techniques and Results
+ *    Michael Mitzenmacher, Andrea W. Richa y, Ramesh Sitaraman
+ *    http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/twosurvey.pdf
+ *
+ */
+static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc,
+					      const struct sk_buff *skb,
+					      struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *choice1 = NULL, *choice2 = NULL;
+	int rweight1, rweight2, weight1 = -1, weight2 = -1, overhead1 = 0;
+	int overhead2, total_weight = 0, weight;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	/* Generate a random weight between [0,sum of all weights) */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD)) {
+			weight = atomic_read(&dest->weight);
+			if (weight > 0) {
+				total_weight += weight;
+				choice1 = dest;
+			}
+		}
+	}
+
+	if (!choice1) {
+		ip_vs_scheduler_err(svc, "no destination available");
+		return NULL;
+	}
+
+	/* Add 1 to total_weight so that the random weights are inclusive
+	 * from 0 to total_weight
+	 */
+	total_weight += 1;
+	rweight1 = prandom_u32_max(total_weight);
+	rweight2 = prandom_u32_max(total_weight);
+
+	/* Pick two weighted servers */
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		weight = atomic_read(&dest->weight);
+		if (weight <= 0)
+			continue;
+
+		rweight1 -= weight;
+		rweight2 -= weight;
+
+		if (rweight1 <= 0 && weight1 == -1) {
+			choice1 = dest;
+			weight1 = weight;
+			overhead1 = ip_vs_dest_conn_overhead(dest);
+		}
+
+		if (rweight2 <= 0 && weight2 == -1) {
+			choice2 = dest;
+			weight2 = weight;
+			overhead2 = ip_vs_dest_conn_overhead(dest);
+		}
+
+		if (weight1 != -1 && weight2 != -1)
+			goto nextstage;
+	}
+
+nextstage:
+	if (choice2 && (weight2 * overhead1) > (weight1 * overhead2))
+		choice1 = choice2;
+
+	IP_VS_DBG_BUF(6, "twos: server %s:%u conns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(choice1->af, &choice1->addr),
+		      ntohs(choice1->port), atomic_read(&choice1->activeconns),
+		      refcount_read(&choice1->refcnt),
+		      atomic_read(&choice1->weight));
+
+	return choice1;
+}
+
+static struct ip_vs_scheduler ip_vs_twos_scheduler = {
+	.name = "twos",
+	.refcnt = ATOMIC_INIT(0),
+	.module = THIS_MODULE,
+	.n_list = LIST_HEAD_INIT(ip_vs_twos_scheduler.n_list),
+	.schedule = ip_vs_twos_schedule,
+};
+
+static int __init ip_vs_twos_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_twos_scheduler);
+}
+
+static void __exit ip_vs_twos_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_twos_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_twos_init);
+module_exit(ip_vs_twos_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
new file mode 100644
index 000000000..09f584b56
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Weighted Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
+ *     Wensong Zhang            :     changed to use the inactconns in scheduling
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *		  (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_dest_conn_overhead(least);
+			goto nextstage;
+		}
+	}
+	ip_vs_scheduler_err(svc, "no destination available");
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_dest_conn_overhead(dest);
+		if ((__s64)loh * atomic_read(&dest->weight) >
+		    (__s64)doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(least->af, &least->addr),
+		      ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      refcount_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+	.name =			"wlc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+	.schedule =		ip_vs_wlc_schedule,
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
new file mode 100644
index 000000000..1bc7a0789
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IPVS:        Weighted Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
+ *     Julian Anastasov         :     fixed the bug of returning destination
+ *                                    with weight 0 when all weights are zero
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/gcd.h>
+
+#include <net/ip_vs.h>
+
+/* The WRR algorithm depends on some caclulations:
+ * - mw: maximum weight
+ * - di: weight step, greatest common divisor from all weights
+ * - cw: current required weight
+ * As result, all weights are in the [di..mw] range with a step=di.
+ *
+ * First, we start with cw = mw and select dests with weight >= cw.
+ * Then cw is reduced with di and all dests are checked again.
+ * Last pass should be with cw = di. We have mw/di passes in total:
+ *
+ * pass 1: cw = max weight
+ * pass 2: cw = max weight - di
+ * pass 3: cw = max weight - 2 * di
+ * ...
+ * last pass: cw = di
+ *
+ * Weights are supposed to be >= di but we run in parallel with
+ * weight changes, it is possible some dest weight to be reduced
+ * below di, bad if it is the only available dest.
+ *
+ * So, we modify how mw is calculated, now it is reduced with (di - 1),
+ * so that last cw is 1 to catch such dests with weight below di:
+ * pass 1: cw = max weight - (di - 1)
+ * pass 2: cw = max weight - di - (di - 1)
+ * pass 3: cw = max weight - 2 * di - (di - 1)
+ * ...
+ * last pass: cw = 1
+ *
+ */
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+	struct ip_vs_dest *cl;	/* current dest or head */
+	int cw;			/* current weight */
+	int mw;			/* maximum weight */
+	int di;			/* decreasing interval */
+	struct rcu_head		rcu_head;
+};
+
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		weight = atomic_read(&dest->weight);
+		if (weight > 0) {
+			if (g > 0)
+				g = gcd(weight, g);
+			else
+				g = weight;
+		}
+	}
+	return g ? g : 1;
+}
+
+
+/*
+ *    Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int new_weight, weight = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		new_weight = atomic_read(&dest->weight);
+		if (new_weight > weight)
+			weight = new_weight;
+	}
+
+	return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark;
+
+	/*
+	 *    Allocate the mark variable for WRR scheduling
+	 */
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);
+	if (mark == NULL)
+		return -ENOMEM;
+
+	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+	mark->cw = mark->mw;
+	svc->sched_data = mark;
+
+	return 0;
+}
+
+
+static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	/*
+	 *    Release the mark variable
+	 */
+	kfree_rcu(mark, rcu_head);
+}
+
+
+static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
+				  struct ip_vs_dest *dest)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	spin_lock_bh(&svc->sched_lock);
+	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+	if (mark->cw > mark->mw || !mark->cw)
+		mark->cw = mark->mw;
+	else if (mark->di > 1)
+		mark->cw = (mark->cw / mark->di) * mark->di + 1;
+	spin_unlock_bh(&svc->sched_lock);
+	return 0;
+}
+
+
+/*
+ *    Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+		   struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_dest *dest, *last, *stop = NULL;
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+	bool last_pass = false, restarted = false;
+
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+	spin_lock_bh(&svc->sched_lock);
+	dest = mark->cl;
+	/* No available dests? */
+	if (mark->mw == 0)
+		goto err_noavail;
+	last = dest;
+	/* Stop only after all dests were checked for weight >= 1 (last pass) */
+	while (1) {
+		list_for_each_entry_continue_rcu(dest,
+						 &svc->destinations,
+						 n_list) {
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) >= mark->cw)
+				goto found;
+			if (dest == stop)
+				goto err_over;
+		}
+		mark->cw -= mark->di;
+		if (mark->cw <= 0) {
+			mark->cw = mark->mw;
+			/* Stop if we tried last pass from first dest:
+			 * 1. last_pass: we started checks when cw > di but
+			 *	then all dests were checked for w >= 1
+			 * 2. last was head: the first and only traversal
+			 *	was for weight >= 1, for all dests.
+			 */
+			if (last_pass ||
+			    &last->n_list == &svc->destinations)
+				goto err_over;
+			restarted = true;
+		}
+		last_pass = mark->cw <= mark->di;
+		if (last_pass && restarted &&
+		    &last->n_list != &svc->destinations) {
+			/* First traversal was for w >= 1 but only
+			 * for dests after 'last', now do the same
+			 * for all dests up to 'last'.
+			 */
+			stop = last;
+		}
+	}
+
+found:
+	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      refcount_read(&dest->refcnt),
+		      atomic_read(&dest->weight));
+	mark->cl = dest;
+
+  out:
+	spin_unlock_bh(&svc->sched_lock);
+	return dest;
+
+err_noavail:
+	mark->cl = dest;
+	dest = NULL;
+	ip_vs_scheduler_err(svc, "no destination available");
+	goto out;
+
+err_over:
+	mark->cl = dest;
+	dest = NULL;
+	ip_vs_scheduler_err(svc, "no destination available: "
+			    "all destinations are overloaded");
+	goto out;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+	.name =			"wrr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+	.init_service =		ip_vs_wrr_init_svc,
+	.done_service =		ip_vs_wrr_done_svc,
+	.add_dest =		ip_vs_wrr_dest_changed,
+	.del_dest =		ip_vs_wrr_dest_changed,
+	.upd_dest =		ip_vs_wrr_dest_changed,
+	.schedule =		ip_vs_wrr_schedule,
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+	synchronize_rcu();
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
new file mode 100644
index 000000000..d40a4ca2b
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ip_vs_xmit.c: various packet transmitters for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ * Changes:
+ *
+ * Description of forwarding methods:
+ * - all transmitters are called from LOCAL_IN (remote clients) and
+ * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
+ * - not all connections have destination server, for example,
+ * connections in backup server when fwmark is used
+ * - bypass connections use daddr from packet
+ * - we can use dst without ref while sending in RCU section, we use
+ * ref when returning NF_ACCEPT for NAT-ed packet via loopback
+ * LOCAL_OUT rules:
+ * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
+ * - skb->pkt_type is not set yet
+ * - the only place where we can see skb->sk != NULL
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/gue.h>
+#include <net/gre.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>                  /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+enum {
+	IP_VS_RT_MODE_LOCAL	= 1, /* Allow local dest */
+	IP_VS_RT_MODE_NON_LOCAL	= 2, /* Allow non-local dest */
+	IP_VS_RT_MODE_RDR	= 4, /* Allow redirect from remote daddr to
+				      * local
+				      */
+	IP_VS_RT_MODE_CONNECT	= 8, /* Always bind route to saddr */
+	IP_VS_RT_MODE_KNOWN_NH	= 16,/* Route via remote addr */
+	IP_VS_RT_MODE_TUNNEL	= 32,/* Tunnel mode */
+};
+
+static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
+{
+	return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
+}
+
+static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
+{
+	kfree(dest_dst);
+}
+
+/*
+ *      Destination cache to speed up outgoing route lookup
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
+		struct dst_entry *dst, u32 dst_cookie)
+{
+	struct ip_vs_dest_dst *old;
+
+	old = rcu_dereference_protected(dest->dest_dst,
+					lockdep_is_held(&dest->dst_lock));
+
+	if (dest_dst) {
+		dest_dst->dst_cache = dst;
+		dest_dst->dst_cookie = dst_cookie;
+	}
+	rcu_assign_pointer(dest->dest_dst, dest_dst);
+
+	if (old)
+		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
+}
+
+static inline struct ip_vs_dest_dst *
+__ip_vs_dst_check(struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
+	struct dst_entry *dst;
+
+	if (!dest_dst)
+		return NULL;
+	dst = dest_dst->dst_cache;
+	if (dst->obsolete &&
+	    dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
+		return NULL;
+	return dest_dst;
+}
+
+static inline bool
+__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
+{
+	if (IP6CB(skb)->frag_max_size) {
+		/* frag_max_size tell us that, this packet have been
+		 * defragmented by netfilter IPv6 conntrack module.
+		 */
+		if (IP6CB(skb)->frag_max_size > mtu)
+			return true; /* largest fragment violate MTU */
+	}
+	else if (skb->len > mtu && !skb_is_gso(skb)) {
+		return true; /* Packet size violate MTU size */
+	}
+	return false;
+}
+
+/* Get route to daddr, update *saddr, optionally bind route to saddr */
+static struct rtable *do_output_route4(struct net *net, __be32 daddr,
+				       int rt_mode, __be32 *saddr)
+{
+	struct flowi4 fl4;
+	struct rtable *rt;
+	bool loop = false;
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.daddr = daddr;
+	fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
+			   FLOWI_FLAG_KNOWN_NH : 0;
+
+retry:
+	rt = ip_route_output_key(net, &fl4);
+	if (IS_ERR(rt)) {
+		/* Invalid saddr ? */
+		if (PTR_ERR(rt) == -EINVAL && *saddr &&
+		    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
+			*saddr = 0;
+			flowi4_update_output(&fl4, 0, 0, daddr, 0);
+			goto retry;
+		}
+		IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
+		return NULL;
+	} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
+		ip_rt_put(rt);
+		*saddr = fl4.saddr;
+		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
+		loop = true;
+		goto retry;
+	}
+	*saddr = fl4.saddr;
+	return rt;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+	return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+						int rt_mode,
+						bool new_rt_is_local)
+{
+	bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+	bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_NON_LOCAL);
+	bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+	bool source_is_loopback;
+	bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+		source_is_loopback =
+			(!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+			(addr_type & IPV6_ADDR_LOOPBACK);
+		old_rt_is_local = __ip_vs_is_local_route6(
+			(struct rt6_info *)skb_dst(skb));
+	} else
+#endif
+	{
+		source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+		old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+	}
+
+	if (unlikely(new_rt_is_local)) {
+		if (!rt_mode_allow_local)
+			return true;
+		if (!rt_mode_allow_redirect && !old_rt_is_local)
+			return true;
+	} else {
+		if (!rt_mode_allow_non_local)
+			return true;
+		if (source_is_loopback)
+			return true;
+	}
+	return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+	struct sock *sk = skb->sk;
+	struct rtable *ort = skb_rtable(skb);
+
+	if (!skb->dev && sk && sk_fullsock(sk))
+		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
+}
+
+static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+					  int rt_mode,
+					  struct ip_vs_iphdr *ipvsh,
+					  struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		struct net *net = ipvs->net;
+
+		if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+			if (!skb->dev)
+				skb->dev = net->loopback_dev;
+			/* only send ICMP too big on first fragment */
+			if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
+				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			IP_VS_DBG(1, "frag needed for %pI6c\n",
+				  &ipv6_hdr(skb)->saddr);
+			return false;
+		}
+	} else
+#endif
+	{
+		/* If we're going to tunnel the packet and pmtu discovery
+		 * is disabled, we'll just fragment it anyway
+		 */
+		if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+			return true;
+
+		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+			     skb->len > mtu && !skb_is_gso(skb) &&
+			     !ip_vs_iph_icmp(ipvsh))) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+			IP_VS_DBG(1, "frag needed for %pI4\n",
+				  &ip_hdr(skb)->saddr);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static inline bool decrement_ttl(struct netns_ipvs *ipvs,
+				 int skb_af,
+				 struct sk_buff *skb)
+{
+	struct net *net = ipvs->net;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		struct dst_entry *dst = skb_dst(skb);
+
+		/* check and decrement ttl */
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
+
+			/* Force OUTPUT device used as source address */
+			skb->dev = dst->dev;
+			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+				    ICMPV6_EXC_HOPLIMIT, 0);
+			IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+
+			return false;
+		}
+
+		/* don't propagate ttl change to cloned packets */
+		if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
+			return false;
+
+		ipv6_hdr(skb)->hop_limit--;
+	} else
+#endif
+	{
+		if (ip_hdr(skb)->ttl <= 1) {
+			/* Tell the sender its packet died... */
+			IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+			icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+			return false;
+		}
+
+		/* don't propagate ttl change to cloned packets */
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
+			return false;
+
+		/* Decrease ttl */
+		ip_decrease_ttl(ip_hdr(skb));
+	}
+
+	return true;
+}
+
+/* Get route to destination or remote server */
+static int
+__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+		   struct ip_vs_dest *dest,
+		   __be32 daddr, int rt_mode, __be32 *ret_saddr,
+		   struct ip_vs_iphdr *ipvsh)
+{
+	struct net *net = ipvs->net;
+	struct ip_vs_dest_dst *dest_dst;
+	struct rtable *rt;			/* Route to the other host */
+	int mtu;
+	int local, noref = 1;
+
+	if (dest) {
+		dest_dst = __ip_vs_dst_check(dest);
+		if (likely(dest_dst))
+			rt = (struct rtable *) dest_dst->dst_cache;
+		else {
+			dest_dst = ip_vs_dest_dst_alloc();
+			spin_lock_bh(&dest->dst_lock);
+			if (!dest_dst) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				goto err_unreach;
+			}
+			rt = do_output_route4(net, dest->addr.ip, rt_mode,
+					      &dest_dst->dst_saddr.ip);
+			if (!rt) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				ip_vs_dest_dst_free(dest_dst);
+				goto err_unreach;
+			}
+			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+			spin_unlock_bh(&dest->dst_lock);
+			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
+				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
+				  atomic_read(&rt->dst.__refcnt));
+		}
+		if (ret_saddr)
+			*ret_saddr = dest_dst->dst_saddr.ip;
+	} else {
+		__be32 saddr = htonl(INADDR_ANY);
+
+		noref = 0;
+
+		/* For such unconfigured boxes avoid many route lookups
+		 * for performance reasons because we do not remember saddr
+		 */
+		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
+		rt = do_output_route4(net, daddr, rt_mode, &saddr);
+		if (!rt)
+			goto err_unreach;
+		if (ret_saddr)
+			*ret_saddr = saddr;
+	}
+
+	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
+	if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+						  local))) {
+		IP_VS_DBG_RL("We are crossing local and non-local addresses"
+			     " daddr=%pI4\n", &daddr);
+		goto err_put;
+	}
+
+	if (unlikely(local)) {
+		/* skb to local stack, preserve old route */
+		if (!noref)
+			ip_rt_put(rt);
+		return local;
+	}
+
+	if (!decrement_ttl(ipvs, skb_af, skb))
+		goto err_put;
+
+	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
+		mtu = dst_mtu(&rt->dst);
+	} else {
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+		if (!dest)
+			goto err_put;
+		if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+			mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+			if ((dest->tun_flags &
+			     IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+			    skb->ip_summed == CHECKSUM_PARTIAL)
+				mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+			__be16 tflags = 0;
+
+			if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+				tflags |= TUNNEL_CSUM;
+			mtu -= gre_calc_hlen(tflags);
+		}
+		if (mtu < 68) {
+			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
+			goto err_put;
+		}
+		maybe_update_pmtu(skb_af, skb, mtu);
+	}
+
+	if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
+		goto err_put;
+
+	skb_dst_drop(skb);
+	if (noref)
+		skb_dst_set_noref(skb, &rt->dst);
+	else
+		skb_dst_set(skb, &rt->dst);
+
+	return local;
+
+err_put:
+	if (!noref)
+		ip_rt_put(rt);
+	return -1;
+
+err_unreach:
+	dst_link_failure(skb);
+	return -1;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static struct dst_entry *
+__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
+			struct in6_addr *ret_saddr, int do_xfrm, int rt_mode)
+{
+	struct dst_entry *dst;
+	struct flowi6 fl6 = {
+		.daddr = *daddr,
+	};
+
+	if (rt_mode & IP_VS_RT_MODE_KNOWN_NH)
+		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (dst->error)
+		goto out_err;
+	if (!ret_saddr)
+		return dst;
+	if (ipv6_addr_any(&fl6.saddr) &&
+	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+			       &fl6.daddr, 0, &fl6.saddr) < 0)
+		goto out_err;
+	if (do_xfrm) {
+		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+		if (IS_ERR(dst)) {
+			dst = NULL;
+			goto out_err;
+		}
+	}
+	*ret_saddr = fl6.saddr;
+	return dst;
+
+out_err:
+	dst_release(dst);
+	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
+	return NULL;
+}
+
+/*
+ * Get route to destination or remote server
+ */
+static int
+__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+		      struct ip_vs_dest *dest,
+		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
+		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
+{
+	struct net *net = ipvs->net;
+	struct ip_vs_dest_dst *dest_dst;
+	struct rt6_info *rt;			/* Route to the other host */
+	struct dst_entry *dst;
+	int mtu;
+	int local, noref = 1;
+
+	if (dest) {
+		dest_dst = __ip_vs_dst_check(dest);
+		if (likely(dest_dst))
+			rt = (struct rt6_info *) dest_dst->dst_cache;
+		else {
+			u32 cookie;
+
+			dest_dst = ip_vs_dest_dst_alloc();
+			spin_lock_bh(&dest->dst_lock);
+			if (!dest_dst) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				goto err_unreach;
+			}
+			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
+						      &dest_dst->dst_saddr.in6,
+						      do_xfrm, rt_mode);
+			if (!dst) {
+				__ip_vs_dst_set(dest, NULL, NULL, 0);
+				spin_unlock_bh(&dest->dst_lock);
+				ip_vs_dest_dst_free(dest_dst);
+				goto err_unreach;
+			}
+			rt = (struct rt6_info *) dst;
+			cookie = rt6_get_cookie(rt);
+			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+			spin_unlock_bh(&dest->dst_lock);
+			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
+				  &dest->addr.in6, &dest_dst->dst_saddr.in6,
+				  atomic_read(&rt->dst.__refcnt));
+		}
+		if (ret_saddr)
+			*ret_saddr = dest_dst->dst_saddr.in6;
+	} else {
+		noref = 0;
+		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
+					      rt_mode);
+		if (!dst)
+			goto err_unreach;
+		rt = (struct rt6_info *) dst;
+	}
+
+	local = __ip_vs_is_local_route6(rt);
+
+	if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+						  local))) {
+		IP_VS_DBG_RL("We are crossing local and non-local addresses"
+			     " daddr=%pI6\n", daddr);
+		goto err_put;
+	}
+
+	if (unlikely(local)) {
+		/* skb to local stack, preserve old route */
+		if (!noref)
+			dst_release(&rt->dst);
+		return local;
+	}
+
+	if (!decrement_ttl(ipvs, skb_af, skb))
+		goto err_put;
+
+	/* MTU checking */
+	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
+		mtu = dst_mtu(&rt->dst);
+	else {
+		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+		if (!dest)
+			goto err_put;
+		if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+			mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+			if ((dest->tun_flags &
+			     IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+			    skb->ip_summed == CHECKSUM_PARTIAL)
+				mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+			__be16 tflags = 0;
+
+			if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+				tflags |= TUNNEL_CSUM;
+			mtu -= gre_calc_hlen(tflags);
+		}
+		if (mtu < IPV6_MIN_MTU) {
+			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
+				     IPV6_MIN_MTU);
+			goto err_put;
+		}
+		maybe_update_pmtu(skb_af, skb, mtu);
+	}
+
+	if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
+		goto err_put;
+
+	skb_dst_drop(skb);
+	if (noref)
+		skb_dst_set_noref(skb, &rt->dst);
+	else
+		skb_dst_set(skb, &rt->dst);
+
+	return local;
+
+err_put:
+	if (!noref)
+		dst_release(&rt->dst);
+	return -1;
+
+err_unreach:
+	/* The ip6_link_failure function requires the dev field to be set
+	 * in order to get the net (further for the sake of fwmark
+	 * reflection).
+	 */
+	if (!skb->dev)
+		skb->dev = skb_dst(skb)->dev;
+
+	dst_link_failure(skb);
+	return -1;
+}
+#endif
+
+
+/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
+static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
+					    struct ip_vs_conn *cp)
+{
+	int ret = NF_ACCEPT;
+
+	skb->ipvs_property = 1;
+	if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
+		ret = ip_vs_confirm_conntrack(skb);
+	if (ret == NF_ACCEPT) {
+		nf_reset_ct(skb);
+		skb_forward_csum(skb);
+		if (skb->dev)
+			skb_clear_tstamp(skb);
+	}
+	return ret;
+}
+
+/* In the event of a remote destination, it's possible that we would have
+ * matches against an old socket (particularly a TIME-WAIT socket). This
+ * causes havoc down the line (ip_local_out et. al. expect regular sockets
+ * and invalid memory accesses will happen) so simply drop the association
+ * in this case.
+*/
+static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
+{
+	/* If dev is set, the packet came from the LOCAL_IN callback and
+	 * not from a local TCP socket.
+	 */
+	if (skb->dev)
+		skb_orphan(skb);
+}
+
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
+					 struct ip_vs_conn *cp, int local)
+{
+	int ret = NF_STOLEN;
+
+	skb->ipvs_property = 1;
+	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+		ip_vs_notrack(skb);
+	else
+		ip_vs_update_conntrack(skb, cp, 1);
+
+	/* Remove the early_demux association unless it's bound for the
+	 * exact same port and address on this host after translation.
+	 */
+	if (!local || cp->vport != cp->dport ||
+	    !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
+		ip_vs_drop_early_demux_sk(skb);
+
+	if (!local) {
+		skb_forward_csum(skb);
+		if (skb->dev)
+			skb_clear_tstamp(skb);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+			NULL, skb_dst(skb)->dev, dst_output);
+	} else
+		ret = NF_ACCEPT;
+
+	return ret;
+}
+
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
+				     struct ip_vs_conn *cp, int local)
+{
+	int ret = NF_STOLEN;
+
+	skb->ipvs_property = 1;
+	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+		ip_vs_notrack(skb);
+	if (!local) {
+		ip_vs_drop_early_demux_sk(skb);
+		skb_forward_csum(skb);
+		if (skb->dev)
+			skb_clear_tstamp(skb);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+			NULL, skb_dst(skb)->dev, dst_output);
+	} else
+		ret = NF_ACCEPT;
+	return ret;
+}
+
+
+/*
+ *      NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+int
+ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	/* we do not touch skb and do not need pskb ptr */
+	return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
+}
+
+
+/*
+ *      Bypass transmitter
+ *      Let packets bypass the destination when the destination is not
+ *      available, it may be only used in transparent cache cluster.
+ */
+int
+ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct iphdr  *iph = ip_hdr(skb);
+
+	EnterFunction(10);
+
+	if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
+			       IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
+		goto tx_error;
+
+	ip_send_check(iph);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	EnterFunction(10);
+
+	if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
+				  &iph->daddr, NULL,
+				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
+		goto tx_error;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+/*
+ *      NAT transmitter (only for outside-to-inside nat forwarding)
+ *      Not used for related ICMP
+ */
+int
+ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	       struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct rtable *rt;		/* Route to the other host */
+	int local, rc, was_input;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+
+		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	was_input = rt_is_input_route(skb_rtable(skb));
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_RDR, NULL, ipvsh);
+	if (local < 0)
+		goto tx_error;
+	rt = skb_rtable(skb);
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct) {
+			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
+					 "ip_vs_nat_xmit(): "
+					 "stopping DNAT to local address");
+			goto tx_error;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
+		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
+				 "ip_vs_nat_xmit(): stopping DNAT to loopback "
+				 "address");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (skb_ensure_writable(skb, sizeof(struct iphdr)))
+		goto tx_error;
+
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
+		goto tx_error;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
+		goto tx_error;
+	ip_hdr(skb)->daddr = cp->daddr.ip;
+	ip_send_check(ip_hdr(skb));
+
+	IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+
+	LeaveFunction(10);
+	return rc;
+
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	int local, rc;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
+				      NULL, ipvsh, 0,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL |
+				      IP_VS_RT_MODE_RDR);
+	if (local < 0)
+		goto tx_error;
+	rt = (struct rt6_info *) skb_dst(skb);
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct) {
+			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
+					 "ip_vs_nat_xmit_v6(): "
+					 "stopping DNAT to local address");
+			goto tx_error;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
+		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
+				 "ip_vs_nat_xmit_v6(): "
+				 "stopping DNAT to loopback address");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
+		goto tx_error;
+
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
+		goto tx_error;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
+		goto tx_error;
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+
+	LeaveFunction(10);
+	return rc;
+
+tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+#endif
+
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb.  This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+			   unsigned int max_headroom, __u8 *next_protocol,
+			   __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+			   __be16 *df)
+{
+	struct sk_buff *new_skb = NULL;
+	struct iphdr *old_iph = NULL;
+	__u8 old_dsfield;
+#ifdef CONFIG_IP_VS_IPV6
+	struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+	ip_vs_drop_early_demux_sk(skb);
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+		new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb)
+			goto error;
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		consume_skb(skb);
+		skb = new_skb;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		old_ipv6h = ipv6_hdr(skb);
+		*next_protocol = IPPROTO_IPV6;
+		if (payload_len)
+			*payload_len =
+				ntohs(old_ipv6h->payload_len) +
+				sizeof(*old_ipv6h);
+		old_dsfield = ipv6_get_dsfield(old_ipv6h);
+		*ttl = old_ipv6h->hop_limit;
+		if (df)
+			*df = 0;
+	} else
+#endif
+	{
+		old_iph = ip_hdr(skb);
+		/* Copy DF, reset fragment offset and MF */
+		if (df)
+			*df = (old_iph->frag_off & htons(IP_DF));
+		*next_protocol = IPPROTO_IPIP;
+
+		/* fix old IP header checksum */
+		ip_send_check(old_iph);
+		old_dsfield = ipv4_get_dsfield(old_iph);
+		*ttl = old_iph->ttl;
+		if (payload_len)
+			*payload_len = skb_ip_totlen(skb);
+	}
+
+	/* Implement full-functionality option for ECN encapsulation */
+	*dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield);
+
+	return skb;
+error:
+	kfree_skb(skb);
+	return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+	switch (encaps_af) {
+	case AF_INET:
+		return SKB_GSO_IPXIP4;
+	case AF_INET6:
+		return SKB_GSO_IPXIP6;
+	default:
+		return 0;
+	}
+}
+
+static int
+ipvs_gue_encap(struct net *net, struct sk_buff *skb,
+	       struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+	__be16 dport;
+	__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
+	struct udphdr  *udph;	/* Our new UDP header */
+	struct guehdr  *gueh;	/* Our new GUE header */
+	size_t hdrlen, optlen = 0;
+	void *data;
+	bool need_priv = false;
+
+	if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		need_priv = true;
+	}
+
+	hdrlen = sizeof(struct guehdr) + optlen;
+
+	skb_push(skb, hdrlen);
+
+	gueh = (struct guehdr *)skb->data;
+
+	gueh->control = 0;
+	gueh->version = 0;
+	gueh->hlen = optlen >> 2;
+	gueh->flags = 0;
+	gueh->proto_ctype = *next_protocol;
+
+	data = &gueh[1];
+
+	if (need_priv) {
+		__be32 *flags = data;
+		u16 csum_start = skb_checksum_start_offset(skb);
+		__be16 *pd;
+
+		gueh->flags |= GUE_FLAG_PRIV;
+		*flags = 0;
+		data += GUE_LEN_PRIV;
+
+		if (csum_start < hdrlen)
+			return -EINVAL;
+
+		csum_start -= hdrlen;
+		pd = data;
+		pd[0] = htons(csum_start);
+		pd[1] = htons(csum_start + skb->csum_offset);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+
+		*flags |= GUE_PFLAG_REMCSUM;
+		data += GUE_PLEN_REMCSUM;
+	}
+
+	skb_push(skb, sizeof(struct udphdr));
+	skb_reset_transport_header(skb);
+
+	udph = udp_hdr(skb);
+
+	dport = cp->dest->tun_port;
+	udph->dest = dport;
+	udph->source = sport;
+	udph->len = htons(skb->len);
+	udph->check = 0;
+
+	*next_protocol = IPPROTO_UDP;
+
+	return 0;
+}
+
+static void
+ipvs_gre_encap(struct net *net, struct sk_buff *skb,
+	       struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+	__be16 proto = *next_protocol == IPPROTO_IPIP ?
+				htons(ETH_P_IP) : htons(ETH_P_IPV6);
+	__be16 tflags = 0;
+	size_t hdrlen;
+
+	if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+		tflags |= TUNNEL_CSUM;
+
+	hdrlen = gre_calc_hlen(tflags);
+	gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
+
+	*next_protocol = IPPROTO_GRE;
+}
+
+/*
+ *   IP Tunneling transmitter
+ *
+ *   This function encapsulates the packet in a new IP packet, its
+ *   destination will be set to cp->daddr. Most code of this function
+ *   is taken from ipip.c.
+ *
+ *   It is used in VS/TUN cluster. The load balancer selects a real
+ *   server from a cluster based on a scheduling algorithm,
+ *   encapsulates the request packet and forwards it to the selected
+ *   server. For example, all real servers are configured with
+ *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ *   the encapsulated packet, it will decapsulate the packet, processe
+ *   the request and return the response packets directly to the client
+ *   without passing the load balancer. This can greatly increase the
+ *   scalability of virtual server.
+ *
+ *   Used for ANY protocol
+ */
+int
+ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct netns_ipvs *ipvs = cp->ipvs;
+	struct net *net = ipvs->net;
+	struct rtable *rt;			/* Route to the other host */
+	__be32 saddr;				/* Source for tunnel */
+	struct net_device *tdev;		/* Device to other host */
+	__u8 next_protocol = 0;
+	__u8 dsfield = 0;
+	__u8 ttl = 0;
+	__be16 df = 0;
+	__be16 *dfp = NULL;
+	struct iphdr  *iph;			/* Our new IP header */
+	unsigned int max_headroom;		/* The extra header space needed */
+	int ret, local;
+	int tun_type, gso_type;
+	int tun_flags;
+
+	EnterFunction(10);
+
+	local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_CONNECT |
+				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
+	if (local < 0)
+		goto tx_error;
+	if (local)
+		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
+
+	rt = skb_rtable(skb);
+	tdev = rt->dst.dev;
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+
+	tun_type = cp->dest->tun_type;
+	tun_flags = cp->dest->tun_flags;
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		size_t gue_hdrlen, gue_optlen = 0;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		}
+		gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+		max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		size_t gre_hdrlen;
+		__be16 tflags = 0;
+
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			tflags |= TUNNEL_CSUM;
+		gre_hdrlen = gre_calc_hlen(tflags);
+
+		max_headroom += gre_hdrlen;
+	}
+
+	/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+	dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+					 &next_protocol, NULL, &dsfield,
+					 &ttl, dfp);
+	if (IS_ERR(skb))
+		goto tx_error;
+
+	gso_type = __tun_gso_type_mask(AF_INET, cp->af);
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+		else
+			gso_type |= SKB_GSO_UDP_TUNNEL;
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			gso_type |= SKB_GSO_GRE_CSUM;
+		else
+			gso_type |= SKB_GSO_GRE;
+	}
+
+	if (iptunnel_handle_offloads(skb, gso_type))
+		goto tx_error;
+
+	skb->transport_header = skb->network_header;
+
+	skb_set_inner_ipproto(skb, next_protocol);
+	skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		bool check = false;
+
+		if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+			goto tx_error;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			check = true;
+
+		udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+		ipvs_gre_encap(net, skb, cp, &next_protocol);
+
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ip_hdr(skb);
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	next_protocol;
+	iph->tos		=	dsfield;
+	iph->daddr		=	cp->daddr.ip;
+	iph->saddr		=	saddr;
+	iph->ttl		=	ttl;
+	ip_select_ident(net, skb, NULL);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
+	if (ret == NF_ACCEPT)
+		ip_local_out(net, skb->sk, skb);
+	else if (ret == NF_DROP)
+		kfree_skb(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+  tx_error:
+	if (!IS_ERR(skb))
+		kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	struct netns_ipvs *ipvs = cp->ipvs;
+	struct net *net = ipvs->net;
+	struct rt6_info *rt;		/* Route to the other host */
+	struct in6_addr saddr;		/* Source for tunnel */
+	struct net_device *tdev;	/* Device to other host */
+	__u8 next_protocol = 0;
+	__u32 payload_len = 0;
+	__u8 dsfield = 0;
+	__u8 ttl = 0;
+	struct ipv6hdr  *iph;		/* Our new IP header */
+	unsigned int max_headroom;	/* The extra header space needed */
+	int ret, local;
+	int tun_type, gso_type;
+	int tun_flags;
+
+	EnterFunction(10);
+
+	local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
+				      &saddr, ipvsh, 1,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL |
+				      IP_VS_RT_MODE_TUNNEL);
+	if (local < 0)
+		goto tx_error;
+	if (local)
+		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
+
+	rt = (struct rt6_info *) skb_dst(skb);
+	tdev = rt->dst.dev;
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+	tun_type = cp->dest->tun_type;
+	tun_flags = cp->dest->tun_flags;
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		size_t gue_hdrlen, gue_optlen = 0;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+		}
+		gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+		max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		size_t gre_hdrlen;
+		__be16 tflags = 0;
+
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			tflags |= TUNNEL_CSUM;
+		gre_hdrlen = gre_calc_hlen(tflags);
+
+		max_headroom += gre_hdrlen;
+	}
+
+	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+					 &next_protocol, &payload_len,
+					 &dsfield, &ttl, NULL);
+	if (IS_ERR(skb))
+		goto tx_error;
+
+	gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+		else
+			gso_type |= SKB_GSO_UDP_TUNNEL;
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+		    skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+		if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+			gso_type |= SKB_GSO_GRE_CSUM;
+		else
+			gso_type |= SKB_GSO_GRE;
+	}
+
+	if (iptunnel_handle_offloads(skb, gso_type))
+		goto tx_error;
+
+	skb->transport_header = skb->network_header;
+
+	skb_set_inner_ipproto(skb, next_protocol);
+	skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
+
+	if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		bool check = false;
+
+		if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+			goto tx_error;
+
+		if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+		    (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+			check = true;
+
+		udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
+	} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+		ipvs_gre_encap(net, skb, cp, &next_protocol);
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ipv6_hdr(skb);
+	iph->version		=	6;
+	iph->nexthdr		=	next_protocol;
+	iph->payload_len	=	htons(payload_len);
+	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+	ipv6_change_dsfield(iph, 0, dsfield);
+	iph->daddr = cp->daddr.in6;
+	iph->saddr = saddr;
+	iph->hop_limit		=	ttl;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
+	if (ret == NF_ACCEPT)
+		ip6_local_out(net, skb->sk, skb);
+	else if (ret == NF_DROP)
+		kfree_skb(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+tx_error:
+	if (!IS_ERR(skb))
+		kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *      Direct Routing transmitter
+ *      Used for ANY protocol
+ */
+int
+ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	      struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	int local;
+
+	EnterFunction(10);
+
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
+				   IP_VS_RT_MODE_LOCAL |
+				   IP_VS_RT_MODE_NON_LOCAL |
+				   IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
+	if (local < 0)
+		goto tx_error;
+	if (local)
+		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
+
+	ip_send_check(ip_hdr(skb));
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
+{
+	int local;
+
+	EnterFunction(10);
+
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
+				      NULL, ipvsh, 0,
+				      IP_VS_RT_MODE_LOCAL |
+				      IP_VS_RT_MODE_NON_LOCAL |
+				      IP_VS_RT_MODE_KNOWN_NH);
+	if (local < 0)
+		goto tx_error;
+	if (local)
+		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *	ICMP packet transmitter
+ *	called by the ip_vs_in_icmp
+ */
+int
+ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
+		struct ip_vs_iphdr *iph)
+{
+	struct rtable	*rt;	/* Route to the other host */
+	int rc;
+	int local;
+	int rt_mode, was_input;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp, iph);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+	was_input = rt_is_input_route(skb_rtable(skb));
+
+	/* LOCALNODE from FORWARD hook is not supported */
+	rt_mode = (hooknum != NF_INET_FORWARD) ?
+		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
+		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+				   NULL, iph);
+	if (local < 0)
+		goto tx_error;
+	rt = skb_rtable(skb);
+
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct) {
+			IP_VS_DBG(10, "%s(): "
+				  "stopping DNAT to local address %pI4\n",
+				  __func__, &cp->daddr.ip);
+			goto tx_error;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
+		IP_VS_DBG(1, "%s(): "
+			  "stopping DNAT to loopback %pI4\n",
+			  __func__, &cp->daddr.ip);
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (skb_ensure_writable(skb, offset))
+		goto tx_error;
+
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
+		goto tx_error;
+
+	ip_vs_nat_icmp(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+	goto out;
+
+  tx_error:
+	kfree_skb(skb);
+	rc = NF_STOLEN;
+  out:
+	LeaveFunction(10);
+	return rc;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
+		struct ip_vs_iphdr *ipvsh)
+{
+	struct rt6_info	*rt;	/* Route to the other host */
+	int rc;
+	int local;
+	int rt_mode;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp, ipvsh);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	/* LOCALNODE from FORWARD hook is not supported */
+	rt_mode = (hooknum != NF_INET_FORWARD) ?
+		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
+		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
+	if (local < 0)
+		goto tx_error;
+	rt = (struct rt6_info *) skb_dst(skb);
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct) {
+			IP_VS_DBG(10, "%s(): "
+				  "stopping DNAT to local address %pI6\n",
+				  __func__, &cp->daddr.in6);
+			goto tx_error;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
+		IP_VS_DBG(1, "%s(): "
+			  "stopping DNAT to loopback %pI6\n",
+			  __func__, &cp->daddr.in6);
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (skb_ensure_writable(skb, offset))
+		goto tx_error;
+
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
+		goto tx_error;
+
+	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->ignore_df = 1;
+
+	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+	goto out;
+
+tx_error:
+	kfree_skb(skb);
+	rc = NF_STOLEN;
+out:
+	LeaveFunction(10);
+	return rc;
+}
+#endif
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
new file mode 100644
index 000000000..5d8ed6c90
--- /dev/null
+++ b/net/netfilter/nf_conncount.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * count the number of connections matching an arbitrary key.
+ *
+ * (C) 2017 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * split from xt_connlimit.c:
+ *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ *		only ignore TIME_WAIT or gone connections
+ *   (C) CC Computer Consultants GmbH, 2007
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNCOUNT_SLOTS		256U
+
+#define CONNCOUNT_GC_MAX_NODES	8
+#define MAX_KEYLEN		5
+
+/* we will save the tuples of all connections we care about */
+struct nf_conncount_tuple {
+	struct list_head		node;
+	struct nf_conntrack_tuple	tuple;
+	struct nf_conntrack_zone	zone;
+	int				cpu;
+	u32				jiffies32;
+};
+
+struct nf_conncount_rb {
+	struct rb_node node;
+	struct nf_conncount_list list;
+	u32 key[MAX_KEYLEN];
+	struct rcu_head rcu_head;
+};
+
+static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
+
+struct nf_conncount_data {
+	unsigned int keylen;
+	struct rb_root root[CONNCOUNT_SLOTS];
+	struct net *net;
+	struct work_struct gc_work;
+	unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+	unsigned int gc_tree;
+};
+
+static u_int32_t conncount_rnd __read_mostly;
+static struct kmem_cache *conncount_rb_cachep __read_mostly;
+static struct kmem_cache *conncount_conn_cachep __read_mostly;
+
+static inline bool already_closed(const struct nf_conn *conn)
+{
+	if (nf_ct_protonum(conn) == IPPROTO_TCP)
+		return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
+		       conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
+	else
+		return false;
+}
+
+static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
+{
+	return memcmp(a, b, klen * sizeof(u32));
+}
+
+static void conn_free(struct nf_conncount_list *list,
+		      struct nf_conncount_tuple *conn)
+{
+	lockdep_assert_held(&list->list_lock);
+
+	list->count--;
+	list_del(&conn->node);
+
+	kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static const struct nf_conntrack_tuple_hash *
+find_or_evict(struct net *net, struct nf_conncount_list *list,
+	      struct nf_conncount_tuple *conn)
+{
+	const struct nf_conntrack_tuple_hash *found;
+	unsigned long a, b;
+	int cpu = raw_smp_processor_id();
+	u32 age;
+
+	found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+	if (found)
+		return found;
+	b = conn->jiffies32;
+	a = (u32)jiffies;
+
+	/* conn might have been added just before by another cpu and
+	 * might still be unconfirmed.  In this case, nf_conntrack_find()
+	 * returns no result.  Thus only evict if this cpu added the
+	 * stale entry or if the entry is older than two jiffies.
+	 */
+	age = a - b;
+	if (conn->cpu == cpu || age >= 2) {
+		conn_free(list, conn);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return ERR_PTR(-EAGAIN);
+}
+
+static int __nf_conncount_add(struct net *net,
+			      struct nf_conncount_list *list,
+			      const struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_zone *zone)
+{
+	const struct nf_conntrack_tuple_hash *found;
+	struct nf_conncount_tuple *conn, *conn_n;
+	struct nf_conn *found_ct;
+	unsigned int collect = 0;
+
+	if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
+		goto add_new_node;
+
+	/* check the saved connections */
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		if (collect > CONNCOUNT_GC_MAX_NODES)
+			break;
+
+		found = find_or_evict(net, list, conn);
+		if (IS_ERR(found)) {
+			/* Not found, but might be about to be confirmed */
+			if (PTR_ERR(found) == -EAGAIN) {
+				if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+				    nf_ct_zone_id(zone, zone->dir))
+					return 0; /* already exists */
+			} else {
+				collect++;
+			}
+			continue;
+		}
+
+		found_ct = nf_ct_tuplehash_to_ctrack(found);
+
+		if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
+			/*
+			 * We should not see tuples twice unless someone hooks
+			 * this into a table without "-p tcp --syn".
+			 *
+			 * Attempt to avoid a re-add in this case.
+			 */
+			nf_ct_put(found_ct);
+			return 0;
+		} else if (already_closed(found_ct)) {
+			/*
+			 * we do not care about connections which are
+			 * closed already -> ditch it
+			 */
+			nf_ct_put(found_ct);
+			conn_free(list, conn);
+			collect++;
+			continue;
+		}
+
+		nf_ct_put(found_ct);
+	}
+
+add_new_node:
+	if (WARN_ON_ONCE(list->count > INT_MAX))
+		return -EOVERFLOW;
+
+	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+	if (conn == NULL)
+		return -ENOMEM;
+
+	conn->tuple = *tuple;
+	conn->zone = *zone;
+	conn->cpu = raw_smp_processor_id();
+	conn->jiffies32 = (u32)jiffies;
+	list_add_tail(&conn->node, &list->head);
+	list->count++;
+	list->last_gc = (u32)jiffies;
+	return 0;
+}
+
+int nf_conncount_add(struct net *net,
+		     struct nf_conncount_list *list,
+		     const struct nf_conntrack_tuple *tuple,
+		     const struct nf_conntrack_zone *zone)
+{
+	int ret;
+
+	/* check the saved connections */
+	spin_lock_bh(&list->list_lock);
+	ret = __nf_conncount_add(net, list, tuple, zone);
+	spin_unlock_bh(&list->list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
+
+void nf_conncount_list_init(struct nf_conncount_list *list)
+{
+	spin_lock_init(&list->list_lock);
+	INIT_LIST_HEAD(&list->head);
+	list->count = 0;
+	list->last_gc = (u32)jiffies;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+/* Return true if the list is empty. Must be called with BH disabled. */
+bool nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list)
+{
+	const struct nf_conntrack_tuple_hash *found;
+	struct nf_conncount_tuple *conn, *conn_n;
+	struct nf_conn *found_ct;
+	unsigned int collected = 0;
+	bool ret = false;
+
+	/* don't bother if we just did GC */
+	if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
+		return false;
+
+	/* don't bother if other cpu is already doing GC */
+	if (!spin_trylock(&list->list_lock))
+		return false;
+
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		found = find_or_evict(net, list, conn);
+		if (IS_ERR(found)) {
+			if (PTR_ERR(found) == -ENOENT)
+				collected++;
+			continue;
+		}
+
+		found_ct = nf_ct_tuplehash_to_ctrack(found);
+		if (already_closed(found_ct)) {
+			/*
+			 * we do not care about connections which are
+			 * closed already -> ditch it
+			 */
+			nf_ct_put(found_ct);
+			conn_free(list, conn);
+			collected++;
+			continue;
+		}
+
+		nf_ct_put(found_ct);
+		if (collected > CONNCOUNT_GC_MAX_NODES)
+			break;
+	}
+
+	if (!list->count)
+		ret = true;
+	list->last_gc = (u32)jiffies;
+	spin_unlock(&list->list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
+
+static void __tree_nodes_free(struct rcu_head *h)
+{
+	struct nf_conncount_rb *rbconn;
+
+	rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+	kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
+/* caller must hold tree nf_conncount_locks[] lock */
+static void tree_nodes_free(struct rb_root *root,
+			    struct nf_conncount_rb *gc_nodes[],
+			    unsigned int gc_count)
+{
+	struct nf_conncount_rb *rbconn;
+
+	while (gc_count) {
+		rbconn = gc_nodes[--gc_count];
+		spin_lock(&rbconn->list.list_lock);
+		if (!rbconn->list.count) {
+			rb_erase(&rbconn->node, root);
+			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+		}
+		spin_unlock(&rbconn->list.list_lock);
+	}
+}
+
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+	set_bit(tree, data->pending_trees);
+	schedule_work(&data->gc_work);
+}
+
+static unsigned int
+insert_tree(struct net *net,
+	    struct nf_conncount_data *data,
+	    struct rb_root *root,
+	    unsigned int hash,
+	    const u32 *key,
+	    const struct nf_conntrack_tuple *tuple,
+	    const struct nf_conntrack_zone *zone)
+{
+	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+	struct rb_node **rbnode, *parent;
+	struct nf_conncount_rb *rbconn;
+	struct nf_conncount_tuple *conn;
+	unsigned int count = 0, gc_count = 0;
+	u8 keylen = data->keylen;
+	bool do_gc = true;
+
+	spin_lock_bh(&nf_conncount_locks[hash]);
+restart:
+	parent = NULL;
+	rbnode = &(root->rb_node);
+	while (*rbnode) {
+		int diff;
+		rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
+
+		parent = *rbnode;
+		diff = key_diff(key, rbconn->key, keylen);
+		if (diff < 0) {
+			rbnode = &((*rbnode)->rb_left);
+		} else if (diff > 0) {
+			rbnode = &((*rbnode)->rb_right);
+		} else {
+			int ret;
+
+			ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+			if (ret)
+				count = 0; /* hotdrop */
+			else
+				count = rbconn->list.count;
+			tree_nodes_free(root, gc_nodes, gc_count);
+			goto out_unlock;
+		}
+
+		if (gc_count >= ARRAY_SIZE(gc_nodes))
+			continue;
+
+		if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
+			gc_nodes[gc_count++] = rbconn;
+	}
+
+	if (gc_count) {
+		tree_nodes_free(root, gc_nodes, gc_count);
+		schedule_gc_worker(data, hash);
+		gc_count = 0;
+		do_gc = false;
+		goto restart;
+	}
+
+	/* expected case: match, insert new node */
+	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+	if (rbconn == NULL)
+		goto out_unlock;
+
+	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+	if (conn == NULL) {
+		kmem_cache_free(conncount_rb_cachep, rbconn);
+		goto out_unlock;
+	}
+
+	conn->tuple = *tuple;
+	conn->zone = *zone;
+	memcpy(rbconn->key, key, sizeof(u32) * keylen);
+
+	nf_conncount_list_init(&rbconn->list);
+	list_add(&conn->node, &rbconn->list.head);
+	count = 1;
+	rbconn->list.count = count;
+
+	rb_link_node_rcu(&rbconn->node, parent, rbnode);
+	rb_insert_color(&rbconn->node, root);
+out_unlock:
+	spin_unlock_bh(&nf_conncount_locks[hash]);
+	return count;
+}
+
+static unsigned int
+count_tree(struct net *net,
+	   struct nf_conncount_data *data,
+	   const u32 *key,
+	   const struct nf_conntrack_tuple *tuple,
+	   const struct nf_conntrack_zone *zone)
+{
+	struct rb_root *root;
+	struct rb_node *parent;
+	struct nf_conncount_rb *rbconn;
+	unsigned int hash;
+	u8 keylen = data->keylen;
+
+	hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
+	root = &data->root[hash];
+
+	parent = rcu_dereference_raw(root->rb_node);
+	while (parent) {
+		int diff;
+
+		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
+
+		diff = key_diff(key, rbconn->key, keylen);
+		if (diff < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+		} else if (diff > 0) {
+			parent = rcu_dereference_raw(parent->rb_right);
+		} else {
+			int ret;
+
+			if (!tuple) {
+				nf_conncount_gc_list(net, &rbconn->list);
+				return rbconn->list.count;
+			}
+
+			spin_lock_bh(&rbconn->list.list_lock);
+			/* Node might be about to be free'd.
+			 * We need to defer to insert_tree() in this case.
+			 */
+			if (rbconn->list.count == 0) {
+				spin_unlock_bh(&rbconn->list.list_lock);
+				break;
+			}
+
+			/* same source network -> be counted! */
+			ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+			spin_unlock_bh(&rbconn->list.list_lock);
+			if (ret)
+				return 0; /* hotdrop */
+			else
+				return rbconn->list.count;
+		}
+	}
+
+	if (!tuple)
+		return 0;
+
+	return insert_tree(net, data, root, hash, key, tuple, zone);
+}
+
+static void tree_gc_worker(struct work_struct *work)
+{
+	struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+	struct rb_root *root;
+	struct rb_node *node;
+	unsigned int tree, next_tree, gc_count = 0;
+
+	tree = data->gc_tree % CONNCOUNT_SLOTS;
+	root = &data->root[tree];
+
+	local_bh_disable();
+	rcu_read_lock();
+	for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+		if (nf_conncount_gc_list(data->net, &rbconn->list))
+			gc_count++;
+	}
+	rcu_read_unlock();
+	local_bh_enable();
+
+	cond_resched();
+
+	spin_lock_bh(&nf_conncount_locks[tree]);
+	if (gc_count < ARRAY_SIZE(gc_nodes))
+		goto next; /* do not bother */
+
+	gc_count = 0;
+	node = rb_first(root);
+	while (node != NULL) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+		node = rb_next(node);
+
+		if (rbconn->list.count > 0)
+			continue;
+
+		gc_nodes[gc_count++] = rbconn;
+		if (gc_count >= ARRAY_SIZE(gc_nodes)) {
+			tree_nodes_free(root, gc_nodes, gc_count);
+			gc_count = 0;
+		}
+	}
+
+	tree_nodes_free(root, gc_nodes, gc_count);
+next:
+	clear_bit(tree, data->pending_trees);
+
+	next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+	next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
+
+	if (next_tree < CONNCOUNT_SLOTS) {
+		data->gc_tree = next_tree;
+		schedule_work(work);
+	}
+
+	spin_unlock_bh(&nf_conncount_locks[tree]);
+}
+
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
+ */
+unsigned int nf_conncount_count(struct net *net,
+				struct nf_conncount_data *data,
+				const u32 *key,
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_zone *zone)
+{
+	return count_tree(net, data, key, tuple, zone);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_count);
+
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+					    unsigned int keylen)
+{
+	struct nf_conncount_data *data;
+	int ret, i;
+
+	if (keylen % sizeof(u32) ||
+	    keylen / sizeof(u32) > MAX_KEYLEN ||
+	    keylen == 0)
+		return ERR_PTR(-EINVAL);
+
+	net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	ret = nf_ct_netns_get(net, family);
+	if (ret < 0) {
+		kfree(data);
+		return ERR_PTR(ret);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+		data->root[i] = RB_ROOT;
+
+	data->keylen = keylen / sizeof(u32);
+	data->net = net;
+	INIT_WORK(&data->gc_work, tree_gc_worker);
+
+	return data;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_init);
+
+void nf_conncount_cache_free(struct nf_conncount_list *list)
+{
+	struct nf_conncount_tuple *conn, *conn_n;
+
+	list_for_each_entry_safe(conn, conn_n, &list->head, node)
+		kmem_cache_free(conncount_conn_cachep, conn);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
+
+static void destroy_tree(struct rb_root *r)
+{
+	struct nf_conncount_rb *rbconn;
+	struct rb_node *node;
+
+	while ((node = rb_first(r)) != NULL) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+
+		rb_erase(node, r);
+
+		nf_conncount_cache_free(&rbconn->list);
+
+		kmem_cache_free(conncount_rb_cachep, rbconn);
+	}
+}
+
+void nf_conncount_destroy(struct net *net, unsigned int family,
+			  struct nf_conncount_data *data)
+{
+	unsigned int i;
+
+	cancel_work_sync(&data->gc_work);
+	nf_ct_netns_put(net, family);
+
+	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+		destroy_tree(&data->root[i]);
+
+	kfree(data);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_destroy);
+
+static int __init nf_conncount_modinit(void)
+{
+	int i;
+
+	for (i = 0; i < CONNCOUNT_SLOTS; ++i)
+		spin_lock_init(&nf_conncount_locks[i]);
+
+	conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
+					   sizeof(struct nf_conncount_tuple),
+					   0, 0, NULL);
+	if (!conncount_conn_cachep)
+		return -ENOMEM;
+
+	conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
+					   sizeof(struct nf_conncount_rb),
+					   0, 0, NULL);
+	if (!conncount_rb_cachep) {
+		kmem_cache_destroy(conncount_conn_cachep);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void __exit nf_conncount_modexit(void)
+{
+	kmem_cache_destroy(conncount_conn_cachep);
+	kmem_cache_destroy(conncount_rb_cachep);
+}
+
+module_init(nf_conncount_modinit);
+module_exit(nf_conncount_modexit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
new file mode 100644
index 000000000..385a5f458
--- /dev/null
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Accounting handling for netfilter. */
+
+/*
+ * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/export.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+static bool nf_ct_acct __read_mostly;
+
+module_param_named(acct, nf_ct_acct, bool, 0644);
+MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
+
+void nf_conntrack_acct_pernet_init(struct net *net)
+{
+	net->ct.sysctl_acct = nf_ct_acct;
+}
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
new file mode 100644
index 000000000..d011d2eb0
--- /dev/null
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Amanda extension for IP connection tracking
+ *
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on HW's ip_conntrack_irc.c as well as other modules
+ * (C) 2006 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/textsearch.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+#include <linux/gfp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+static unsigned int master_timeout __read_mostly = 300;
+static char *ts_algo = "kmp";
+
+#define HELPER_NAME "amanda"
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda connection tracking module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_amanda");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+module_param(master_timeout, uint, 0600);
+MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
+module_param(ts_algo, charp, 0400);
+MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
+
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
+				   enum ip_conntrack_info ctinfo,
+				   unsigned int protoff,
+				   unsigned int matchoff,
+				   unsigned int matchlen,
+				   struct nf_conntrack_expect *exp)
+				   __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_amanda_hook);
+
+enum amanda_strings {
+	SEARCH_CONNECT,
+	SEARCH_NEWLINE,
+	SEARCH_DATA,
+	SEARCH_MESG,
+	SEARCH_INDEX,
+	SEARCH_STATE,
+};
+
+static struct {
+	const char		*string;
+	size_t			len;
+	struct ts_config	*ts;
+} search[] __read_mostly = {
+	[SEARCH_CONNECT] = {
+		.string	= "CONNECT ",
+		.len	= 8,
+	},
+	[SEARCH_NEWLINE] = {
+		.string	= "\n",
+		.len	= 1,
+	},
+	[SEARCH_DATA] = {
+		.string	= "DATA ",
+		.len	= 5,
+	},
+	[SEARCH_MESG] = {
+		.string	= "MESG ",
+		.len	= 5,
+	},
+	[SEARCH_INDEX] = {
+		.string = "INDEX ",
+		.len	= 6,
+	},
+	[SEARCH_STATE] = {
+		.string = "STATE ",
+		.len	= 6,
+	},
+};
+
+static int amanda_help(struct sk_buff *skb,
+		       unsigned int protoff,
+		       struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo)
+{
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple *tuple;
+	unsigned int dataoff, start, stop, off, i;
+	char pbuf[sizeof("65535")], *tmp;
+	u_int16_t len;
+	__be16 port;
+	int ret = NF_ACCEPT;
+	typeof(nf_nat_amanda_hook) nf_nat_amanda;
+
+	/* Only look at packets from the Amanda server */
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+		return NF_ACCEPT;
+
+	/* increase the UDP timeout of the master connection as replies from
+	 * Amanda clients to the server can be quite delayed */
+	nf_ct_refresh(ct, skb, master_timeout * HZ);
+
+	/* No data? */
+	dataoff = protoff + sizeof(struct udphdr);
+	if (dataoff >= skb->len) {
+		net_err_ratelimited("amanda_help: skblen = %u\n", skb->len);
+		return NF_ACCEPT;
+	}
+
+	start = skb_find_text(skb, dataoff, skb->len,
+			      search[SEARCH_CONNECT].ts);
+	if (start == UINT_MAX)
+		goto out;
+	start += dataoff + search[SEARCH_CONNECT].len;
+
+	stop = skb_find_text(skb, start, skb->len,
+			     search[SEARCH_NEWLINE].ts);
+	if (stop == UINT_MAX)
+		goto out;
+	stop += start;
+
+	for (i = SEARCH_DATA; i <= SEARCH_STATE; i++) {
+		off = skb_find_text(skb, start, stop, search[i].ts);
+		if (off == UINT_MAX)
+			continue;
+		off += start + search[i].len;
+
+		len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
+		if (skb_copy_bits(skb, off, pbuf, len))
+			break;
+		pbuf[len] = '\0';
+
+		port = htons(simple_strtoul(pbuf, &tmp, 10));
+		len = tmp - pbuf;
+		if (port == 0 || len > 5)
+			break;
+
+		exp = nf_ct_expect_alloc(ct);
+		if (exp == NULL) {
+			nf_ct_helper_log(skb, ct, "cannot alloc expectation");
+			ret = NF_DROP;
+			goto out;
+		}
+		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct),
+				  &tuple->src.u3, &tuple->dst.u3,
+				  IPPROTO_TCP, NULL, &port);
+
+		nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
+		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
+			ret = nf_nat_amanda(skb, ctinfo, protoff,
+					    off - dataoff, len, exp);
+		else if (nf_ct_expect_related(exp, 0) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
+			ret = NF_DROP;
+		}
+		nf_ct_expect_put(exp);
+	}
+
+out:
+	return ret;
+}
+
+static const struct nf_conntrack_expect_policy amanda_exp_policy = {
+	.max_expected		= 4,
+	.timeout		= 180,
+};
+
+static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
+	{
+		.name			= HELPER_NAME,
+		.me			= THIS_MODULE,
+		.help			= amanda_help,
+		.tuple.src.l3num	= AF_INET,
+		.tuple.src.u.udp.port	= cpu_to_be16(10080),
+		.tuple.dst.protonum	= IPPROTO_UDP,
+		.expect_policy		= &amanda_exp_policy,
+		.nat_mod_name		= NF_NAT_HELPER_NAME(HELPER_NAME),
+	},
+	{
+		.name			= "amanda",
+		.me			= THIS_MODULE,
+		.help			= amanda_help,
+		.tuple.src.l3num	= AF_INET6,
+		.tuple.src.u.udp.port	= cpu_to_be16(10080),
+		.tuple.dst.protonum	= IPPROTO_UDP,
+		.expect_policy		= &amanda_exp_policy,
+		.nat_mod_name		= NF_NAT_HELPER_NAME(HELPER_NAME),
+	},
+};
+
+static void __exit nf_conntrack_amanda_fini(void)
+{
+	int i;
+
+	nf_conntrack_helpers_unregister(amanda_helper,
+					ARRAY_SIZE(amanda_helper));
+	for (i = 0; i < ARRAY_SIZE(search); i++)
+		textsearch_destroy(search[i].ts);
+}
+
+static int __init nf_conntrack_amanda_init(void)
+{
+	int ret, i;
+
+	NF_CT_HELPER_BUILD_BUG_ON(0);
+
+	for (i = 0; i < ARRAY_SIZE(search); i++) {
+		search[i].ts = textsearch_prepare(ts_algo, search[i].string,
+						  search[i].len,
+						  GFP_KERNEL, TS_AUTOLOAD);
+		if (IS_ERR(search[i].ts)) {
+			ret = PTR_ERR(search[i].ts);
+			goto err1;
+		}
+	}
+	ret = nf_conntrack_helpers_register(amanda_helper,
+					    ARRAY_SIZE(amanda_helper));
+	if (ret < 0)
+		goto err1;
+	return 0;
+
+err1:
+	while (--i >= 0)
+		textsearch_destroy(search[i].ts);
+
+	return ret;
+}
+
+module_init(nf_conntrack_amanda_init);
+module_exit(nf_conntrack_amanda_fini);
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
new file mode 100644
index 000000000..816283f0a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id   - Specify the network namespace for lookup
+ *		 Values:
+ *		   BPF_F_CURRENT_NETNS (-1)
+ *		     Use namespace associated with ctx (xdp_md, __sk_buff)
+ *		   [0, S32_MAX]
+ *		     Network Namespace ID
+ * @error      - Out parameter, set for any errors encountered
+ *		 Values:
+ *		   -EINVAL - Passed NULL for bpf_tuple pointer
+ *		   -EINVAL - opts->reserved is not 0
+ *		   -EINVAL - netns_id is less than -1
+ *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *		   -ENONET - No network namespace found for netns_id
+ *		   -ENOENT - Conntrack lookup could not find entry for tuple
+ *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *				   or sizeof(tuple->ipv6)
+ * @l4proto    - Layer 4 protocol
+ *		 Values:
+ *		   IPPROTO_TCP, IPPROTO_UDP
+ * @dir:       - connection tracking tuple direction.
+ * @reserved   - Reserved member, will be reused for more options in future
+ *		 Values:
+ *		   0
+ */
+struct bpf_ct_opts {
+	s32 netns_id;
+	s32 error;
+	u8 l4proto;
+	u8 dir;
+	u8 reserved[2];
+};
+
+enum {
+	NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
+				 u32 tuple_len, u8 protonum, u8 dir,
+				 struct nf_conntrack_tuple *tuple)
+{
+	union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
+	union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
+	union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
+						  : &tuple->src.u;
+	union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
+						  : (void *)&tuple->dst.u;
+
+	if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+		return -EPROTO;
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	switch (tuple_len) {
+	case sizeof(bpf_tuple->ipv4):
+		tuple->src.l3num = AF_INET;
+		src->ip = bpf_tuple->ipv4.saddr;
+		sport->tcp.port = bpf_tuple->ipv4.sport;
+		dst->ip = bpf_tuple->ipv4.daddr;
+		dport->tcp.port = bpf_tuple->ipv4.dport;
+		break;
+	case sizeof(bpf_tuple->ipv6):
+		tuple->src.l3num = AF_INET6;
+		memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+		sport->tcp.port = bpf_tuple->ipv6.sport;
+		memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+		dport->tcp.port = bpf_tuple->ipv6.dport;
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+	tuple->dst.protonum = protonum;
+	tuple->dst.dir = dir;
+
+	return 0;
+}
+
+static struct nf_conn *
+__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
+			u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
+			u32 timeout)
+{
+	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conn *ct;
+	int err;
+
+	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+	    opts_len != NF_BPF_CT_OPTS_SZ)
+		return ERR_PTR(-EINVAL);
+
+	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+		return ERR_PTR(-EINVAL);
+
+	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+				    IP_CT_DIR_ORIGINAL, &otuple);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+				    IP_CT_DIR_REPLY, &rtuple);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (opts->netns_id >= 0) {
+		net = get_net_ns_by_id(net, opts->netns_id);
+		if (unlikely(!net))
+			return ERR_PTR(-ENONET);
+	}
+
+	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+				GFP_ATOMIC);
+	if (IS_ERR(ct))
+		goto out;
+
+	memset(&ct->proto, 0, sizeof(ct->proto));
+	__nf_ct_set_timeout(ct, timeout * HZ);
+
+out:
+	if (opts->netns_id >= 0)
+		put_net(net);
+
+	return ct;
+}
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+					  struct bpf_sock_tuple *bpf_tuple,
+					  u32 tuple_len, struct bpf_ct_opts *opts,
+					  u32 opts_len)
+{
+	struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	int err;
+
+	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+	    opts_len != NF_BPF_CT_OPTS_SZ)
+		return ERR_PTR(-EINVAL);
+	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
+		return ERR_PTR(-EPROTO);
+	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+		return ERR_PTR(-EINVAL);
+
+	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+				    IP_CT_DIR_ORIGINAL, &tuple);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (opts->netns_id >= 0) {
+		net = get_net_ns_by_id(net, opts->netns_id);
+		if (unlikely(!net))
+			return ERR_PTR(-ENONET);
+	}
+
+	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+	if (opts->netns_id >= 0)
+		put_net(net);
+	if (!hash)
+		return ERR_PTR(-ENOENT);
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	opts->dir = NF_CT_DIRECTION(hash);
+
+	return ct;
+}
+
+BTF_ID_LIST(btf_nf_conn_ids)
+BTF_ID(struct, nf_conn)
+BTF_ID(struct, nf_conn___init)
+
+/* Check writes into `struct nf_conn` */
+static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+					   const struct btf *btf,
+					   const struct btf_type *t, int off,
+					   int size, enum bpf_access_type atype,
+					   u32 *next_btf_id,
+					   enum bpf_type_flag *flag)
+{
+	const struct btf_type *ncit;
+	const struct btf_type *nct;
+	size_t end;
+
+	ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
+	nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
+
+	if (t != nct && t != ncit) {
+		bpf_log(log, "only read is supported\n");
+		return -EACCES;
+	}
+
+	/* `struct nf_conn` and `struct nf_conn___init` have the same layout
+	 * so we are safe to simply merge offset checks here
+	 */
+	switch (off) {
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	case offsetof(struct nf_conn, mark):
+		end = offsetofend(struct nf_conn, mark);
+		break;
+#endif
+	default:
+		bpf_log(log, "no write support to nf_conn at off %d\n", off);
+		return -EACCES;
+	}
+
+	if (off + size > end) {
+		bpf_log(log,
+			"write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
+			off, size, end);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
+ *		    Cannot be NULL
+ * @bpf_tuple	- Pointer to memory representing the tuple to look up
+ *		    Cannot be NULL
+ * @tuple__sz	- Length of the tuple structure
+ *		    Must be one of sizeof(bpf_tuple->ipv4) or
+ *		    sizeof(bpf_tuple->ipv6)
+ * @opts	- Additional options for allocation (documented above)
+ *		    Cannot be NULL
+ * @opts__sz	- Length of the bpf_ct_opts structure
+ *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+	struct nf_conn *nfct;
+
+	nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
+				       opts, opts__sz, 10);
+	if (IS_ERR(nfct)) {
+		if (opts)
+			opts->error = PTR_ERR(nfct);
+		return NULL;
+	}
+
+	return (struct nf_conn___init *)nfct;
+}
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *		       reference to it
+ *
+ * Parameters:
+ * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
+ *		    Cannot be NULL
+ * @bpf_tuple	- Pointer to memory representing the tuple to look up
+ *		    Cannot be NULL
+ * @tuple__sz	- Length of the tuple structure
+ *		    Must be one of sizeof(bpf_tuple->ipv4) or
+ *		    sizeof(bpf_tuple->ipv6)
+ * @opts	- Additional options for lookup (documented above)
+ *		    Cannot be NULL
+ * @opts__sz	- Length of the bpf_ct_opts structure
+ *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+	struct net *caller_net;
+	struct nf_conn *nfct;
+
+	caller_net = dev_net(ctx->rxq->dev);
+	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
+	if (IS_ERR(nfct)) {
+		if (opts)
+			opts->error = PTR_ERR(nfct);
+		return NULL;
+	}
+	return nfct;
+}
+
+/* bpf_skb_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
+ *		    Cannot be NULL
+ * @bpf_tuple	- Pointer to memory representing the tuple to look up
+ *		    Cannot be NULL
+ * @tuple__sz	- Length of the tuple structure
+ *		    Must be one of sizeof(bpf_tuple->ipv4) or
+ *		    sizeof(bpf_tuple->ipv6)
+ * @opts	- Additional options for allocation (documented above)
+ *		    Cannot be NULL
+ * @opts__sz	- Length of the bpf_ct_opts structure
+ *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+	struct nf_conn *nfct;
+	struct net *net;
+
+	net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+	nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
+	if (IS_ERR(nfct)) {
+		if (opts)
+			opts->error = PTR_ERR(nfct);
+		return NULL;
+	}
+
+	return (struct nf_conn___init *)nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *		       reference to it
+ *
+ * Parameters:
+ * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
+ *		    Cannot be NULL
+ * @bpf_tuple	- Pointer to memory representing the tuple to look up
+ *		    Cannot be NULL
+ * @tuple__sz	- Length of the tuple structure
+ *		    Must be one of sizeof(bpf_tuple->ipv4) or
+ *		    sizeof(bpf_tuple->ipv6)
+ * @opts	- Additional options for lookup (documented above)
+ *		    Cannot be NULL
+ * @opts__sz	- Length of the bpf_ct_opts structure
+ *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+	struct net *caller_net;
+	struct nf_conn *nfct;
+
+	caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
+	if (IS_ERR(nfct)) {
+		if (opts)
+			opts->error = PTR_ERR(nfct);
+		return NULL;
+	}
+	return nfct;
+}
+
+/* bpf_ct_insert_entry - Add the provided entry into a CT map
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID.
+ *
+ * @nfct	 - Pointer to referenced nf_conn___init object, obtained
+ *		   using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ */
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+{
+	struct nf_conn *nfct = (struct nf_conn *)nfct_i;
+	int err;
+
+	if (!nf_ct_is_confirmed(nfct))
+		nfct->timeout += nfct_time_stamp;
+	nfct->status |= IPS_CONFIRMED;
+	err = nf_conntrack_hash_check_insert(nfct);
+	if (err < 0) {
+		nf_conntrack_free(nfct);
+		return NULL;
+	}
+	return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn	 - Pointer to referenced nf_conn object, obtained using
+ *		   bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+	if (!nfct)
+		return;
+	nf_ct_put(nfct);
+}
+
+/* bpf_ct_set_timeout - Set timeout of allocated nf_conn
+ *
+ * Sets the default timeout of newly allocated nf_conn before insertion.
+ * This helper must be invoked for refcounted pointer to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct	 - Pointer to referenced nf_conn object, obtained using
+ *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @timeout      - Timeout in msecs.
+ */
+void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+{
+	__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_change_timeout - Change timeout of inserted nf_conn
+ *
+ * Change timeout associated of the inserted or looked up nf_conn.
+ * This helper must be invoked for refcounted pointer to nf_conn.
+ *
+ * Parameters:
+ * @nfct	 - Pointer to referenced nf_conn object, obtained using
+ *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
+ * @timeout      - New timeout in msecs.
+ */
+int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+{
+	return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_set_status - Set status field of allocated nf_conn
+ *
+ * Set the status field of the newly allocated nf_conn before insertion.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct	 - Pointer to referenced nf_conn object, obtained using
+ *		   bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @status       - New status value.
+ */
+int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+{
+	return nf_ct_change_status_common((struct nf_conn *)nfct, status);
+}
+
+/* bpf_ct_change_status - Change status of inserted nf_conn
+ *
+ * Change the status field of the provided connection tracking entry.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
+ *
+ * Parameters:
+ * @nfct	 - Pointer to referenced nf_conn object, obtained using
+ *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ * @status       - New status value.
+ */
+int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+{
+	return nf_ct_change_status_common(nfct, status);
+}
+
+__diag_pop()
+
+BTF_SET8_START(nf_ct_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_ct_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &nf_ct_kfunc_set,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+	int ret;
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
+	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+	if (!ret) {
+		mutex_lock(&nf_conn_btf_access_lock);
+		nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
+		mutex_unlock(&nf_conn_btf_access_lock);
+	}
+
+	return ret;
+}
+
+void cleanup_nf_conntrack_bpf(void)
+{
+	mutex_lock(&nf_conn_btf_access_lock);
+	nfct_btf_struct_access = NULL;
+	mutex_unlock(&nf_conn_btf_access_lock);
+}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 000000000..9fb9b8031
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *      broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+				struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				unsigned int timeout)
+{
+	const struct nf_conntrack_helper *helper;
+	struct nf_conntrack_expect *exp;
+	struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = skb_rtable(skb);
+	struct in_device *in_dev;
+	struct nf_conn_help *help = nfct_help(ct);
+	__be32 mask = 0;
+
+	/* we're only interested in locally generated packets */
+	if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
+		goto out;
+	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+		goto out;
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		goto out;
+
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
+	if (in_dev != NULL) {
+		const struct in_ifaddr *ifa;
+
+		in_dev_for_each_ifa_rcu(ifa, in_dev) {
+			if (ifa->ifa_flags & IFA_F_SECONDARY)
+				continue;
+
+			if (ifa->ifa_broadcast == iph->daddr) {
+				mask = ifa->ifa_mask;
+				break;
+			}
+		}
+	}
+
+	if (mask == 0)
+		goto out;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL)
+		goto out;
+
+	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	helper = rcu_dereference(help->helper);
+	if (helper)
+		exp->tuple.src.u.udp.port = helper->tuple.src.u.udp.port;
+
+	exp->mask.src.u3.ip       = mask;
+	exp->mask.src.u.udp.port  = htons(0xFFFF);
+
+	exp->expectfn             = NULL;
+	exp->flags                = NF_CT_EXPECT_PERMANENT;
+	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
+	exp->helper               = NULL;
+
+	nf_ct_expect_related(exp, 0);
+	nf_ct_expect_put(exp);
+
+	nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
new file mode 100644
index 000000000..796026296
--- /dev/null
+++ b/net/netfilter/nf_conntrack_core.c
@@ -0,0 +1,2875 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Connection state tracking for netfilter.  This is separated from,
+   but required by, the NAT layer; it can also be used by an iptables
+   extension. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/siphash.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/mm.h>
+#include <linux/nsproxy.h>
+#include <linux/rculist_nulls.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netns/hash.h>
+#include <net/ip.h>
+
+#include "nf_internals.h"
+
+__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+EXPORT_SYMBOL_GPL(nf_conntrack_locks);
+
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+
+struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+
+struct conntrack_gc_work {
+	struct delayed_work	dwork;
+	u32			next_bucket;
+	u32			avg_timeout;
+	u32			count;
+	u32			start_time;
+	bool			exiting;
+	bool			early_drop;
+};
+
+static __read_mostly struct kmem_cache *nf_conntrack_cachep;
+static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
+static __read_mostly bool nf_conntrack_locks_all;
+
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
+#define GC_SCAN_INTERVAL_MAX	(60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN	(1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP	(300ul * HZ)
+
+/* Initial bias pretending we have 100 entries at the upper bound so we don't
+ * wakeup often just because we have three entries with a 1s timeout while still
+ * allowing non-idle machines to wakeup more often when needed.
+ */
+#define GC_SCAN_INITIAL_COUNT	100
+#define GC_SCAN_INTERVAL_INIT	GC_SCAN_INTERVAL_MAX
+
+#define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX	(64000u / HZ)
+
+#define MIN_CHAINLEN	50u
+#define MAX_CHAINLEN	(80u - MIN_CHAINLEN)
+
+static struct conntrack_gc_work conntrack_gc_work;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+	/* 1) Acquire the lock */
+	spin_lock(lock);
+
+	/* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+	 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+	 */
+	if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+		return;
+
+	/* fast path failed, unlock */
+	spin_unlock(lock);
+
+	/* Slow path 1) get global lock */
+	spin_lock(&nf_conntrack_locks_all_lock);
+
+	/* Slow path 2) get the lock we want */
+	spin_lock(lock);
+
+	/* Slow path 3) release the global lock */
+	spin_unlock(&nf_conntrack_locks_all_lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
+static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
+{
+	h1 %= CONNTRACK_LOCKS;
+	h2 %= CONNTRACK_LOCKS;
+	spin_unlock(&nf_conntrack_locks[h1]);
+	if (h1 != h2)
+		spin_unlock(&nf_conntrack_locks[h2]);
+}
+
+/* return true if we need to recompute hashes (in case hash table was resized) */
+static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
+				     unsigned int h2, unsigned int sequence)
+{
+	h1 %= CONNTRACK_LOCKS;
+	h2 %= CONNTRACK_LOCKS;
+	if (h1 <= h2) {
+		nf_conntrack_lock(&nf_conntrack_locks[h1]);
+		if (h1 != h2)
+			spin_lock_nested(&nf_conntrack_locks[h2],
+					 SINGLE_DEPTH_NESTING);
+	} else {
+		nf_conntrack_lock(&nf_conntrack_locks[h2]);
+		spin_lock_nested(&nf_conntrack_locks[h1],
+				 SINGLE_DEPTH_NESTING);
+	}
+	if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
+		nf_conntrack_double_unlock(h1, h2);
+		return true;
+	}
+	return false;
+}
+
+static void nf_conntrack_all_lock(void)
+	__acquires(&nf_conntrack_locks_all_lock)
+{
+	int i;
+
+	spin_lock(&nf_conntrack_locks_all_lock);
+
+	/* For nf_contrack_locks_all, only the latest time when another
+	 * CPU will see an update is controlled, by the "release" of the
+	 * spin_lock below.
+	 * The earliest time is not controlled, an thus KCSAN could detect
+	 * a race when nf_conntract_lock() reads the variable.
+	 * WRITE_ONCE() is used to ensure the compiler will not
+	 * optimize the write.
+	 */
+	WRITE_ONCE(nf_conntrack_locks_all, true);
+
+	for (i = 0; i < CONNTRACK_LOCKS; i++) {
+		spin_lock(&nf_conntrack_locks[i]);
+
+		/* This spin_unlock provides the "release" to ensure that
+		 * nf_conntrack_locks_all==true is visible to everyone that
+		 * acquired spin_lock(&nf_conntrack_locks[]).
+		 */
+		spin_unlock(&nf_conntrack_locks[i]);
+	}
+}
+
+static void nf_conntrack_all_unlock(void)
+	__releases(&nf_conntrack_locks_all_lock)
+{
+	/* All prior stores must be complete before we clear
+	 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
+	 * might observe the false value but not the entire
+	 * critical section.
+	 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
+	 */
+	smp_store_release(&nf_conntrack_locks_all, false);
+	spin_unlock(&nf_conntrack_locks_all_lock);
+}
+
+unsigned int nf_conntrack_htable_size __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
+
+unsigned int nf_conntrack_max __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_spinlock_t nf_conntrack_generation __read_mostly;
+static siphash_aligned_key_t nf_conntrack_hash_rnd;
+
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+			      unsigned int zoneid,
+			      const struct net *net)
+{
+	struct {
+		struct nf_conntrack_man src;
+		union nf_inet_addr dst_addr;
+		unsigned int zone;
+		u32 net_mix;
+		u16 dport;
+		u16 proto;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
+
+	get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
+
+	memset(&combined, 0, sizeof(combined));
+
+	/* The direction must be ignored, so handle usable members manually. */
+	combined.src = tuple->src;
+	combined.dst_addr = tuple->dst.u3;
+	combined.zone = zoneid;
+	combined.net_mix = net_hash_mix(net);
+	combined.dport = (__force __u16)tuple->dst.u.all;
+	combined.proto = tuple->dst.protonum;
+
+	return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd);
+}
+
+static u32 scale_hash(u32 hash)
+{
+	return reciprocal_scale(hash, nf_conntrack_htable_size);
+}
+
+static u32 __hash_conntrack(const struct net *net,
+			    const struct nf_conntrack_tuple *tuple,
+			    unsigned int zoneid,
+			    unsigned int size)
+{
+	return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
+}
+
+static u32 hash_conntrack(const struct net *net,
+			  const struct nf_conntrack_tuple *tuple,
+			  unsigned int zoneid)
+{
+	return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
+}
+
+static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
+				  unsigned int dataoff,
+				  struct nf_conntrack_tuple *tuple)
+{	struct {
+		__be16 sport;
+		__be16 dport;
+	} _inet_hdr, *inet_hdr;
+
+	/* Actually only need first 4 bytes to get ports. */
+	inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+	if (!inet_hdr)
+		return false;
+
+	tuple->src.u.udp.port = inet_hdr->sport;
+	tuple->dst.u.udp.port = inet_hdr->dport;
+	return true;
+}
+
+static bool
+nf_ct_get_tuple(const struct sk_buff *skb,
+		unsigned int nhoff,
+		unsigned int dataoff,
+		u_int16_t l3num,
+		u_int8_t protonum,
+		struct net *net,
+		struct nf_conntrack_tuple *tuple)
+{
+	unsigned int size;
+	const __be32 *ap;
+	__be32 _addrs[8];
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	tuple->src.l3num = l3num;
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		nhoff += offsetof(struct iphdr, saddr);
+		size = 2 * sizeof(__be32);
+		break;
+	case NFPROTO_IPV6:
+		nhoff += offsetof(struct ipv6hdr, saddr);
+		size = sizeof(_addrs);
+		break;
+	default:
+		return true;
+	}
+
+	ap = skb_header_pointer(skb, nhoff, size, _addrs);
+	if (!ap)
+		return false;
+
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		tuple->src.u3.ip = ap[0];
+		tuple->dst.u3.ip = ap[1];
+		break;
+	case NFPROTO_IPV6:
+		memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+		memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+		break;
+	}
+
+	tuple->dst.protonum = protonum;
+	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	switch (protonum) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+	case IPPROTO_ICMP:
+		return icmp_pkt_to_tuple(skb, dataoff, net, tuple);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return gre_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE:
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP:
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP:
+#endif
+		/* fallthrough */
+		return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u_int8_t *protonum)
+{
+	int dataoff = -1;
+	const struct iphdr *iph;
+	struct iphdr _iph;
+
+	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+	if (!iph)
+		return -1;
+
+	/* Conntrack defragments packets, we might still see fragments
+	 * inside ICMP packets though.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		return -1;
+
+	dataoff = nhoff + (iph->ihl << 2);
+	*protonum = iph->protocol;
+
+	/* Check bogus IP headers */
+	if (dataoff > skb->len) {
+		pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+			 nhoff, iph->ihl << 2, skb->len);
+		return -1;
+	}
+	return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u8 *protonum)
+{
+	int protoff = -1;
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("can't get nexthdr\n");
+		return -1;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("can't find proto in pkt\n");
+		return -1;
+	}
+
+	*protonum = nexthdr;
+	return protoff;
+}
+#endif
+
+static int get_l4proto(const struct sk_buff *skb,
+		       unsigned int nhoff, u8 pf, u8 *l4num)
+{
+	switch (pf) {
+	case NFPROTO_IPV4:
+		return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+	default:
+		*l4num = 0;
+		break;
+	}
+	return -1;
+}
+
+bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
+		       u_int16_t l3num,
+		       struct net *net, struct nf_conntrack_tuple *tuple)
+{
+	u8 protonum;
+	int protoff;
+
+	protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+	if (protoff <= 0)
+		return false;
+
+	return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple);
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
+
+bool
+nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
+		   const struct nf_conntrack_tuple *orig)
+{
+	memset(inverse, 0, sizeof(*inverse));
+
+	inverse->src.l3num = orig->src.l3num;
+
+	switch (orig->src.l3num) {
+	case NFPROTO_IPV4:
+		inverse->src.u3.ip = orig->dst.u3.ip;
+		inverse->dst.u3.ip = orig->src.u3.ip;
+		break;
+	case NFPROTO_IPV6:
+		inverse->src.u3.in6 = orig->dst.u3.in6;
+		inverse->dst.u3.in6 = orig->src.u3.in6;
+		break;
+	default:
+		break;
+	}
+
+	inverse->dst.dir = !orig->dst.dir;
+
+	inverse->dst.protonum = orig->dst.protonum;
+
+	switch (orig->dst.protonum) {
+	case IPPROTO_ICMP:
+		return nf_conntrack_invert_icmp_tuple(inverse, orig);
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_invert_icmpv6_tuple(inverse, orig);
+#endif
+	}
+
+	inverse->src.u.all = orig->dst.u.all;
+	inverse->dst.u.all = orig->src.u.all;
+	return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->master address (normally NULL)
+ * 3. the associated net namespace
+ * 4. the original direction tuple
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+	static siphash_aligned_key_t ct_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+	a = (unsigned long)ct;
+	b = (unsigned long)ct->master;
+	c = (unsigned long)nf_ct_net(ct);
+	d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				   sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
+				   &ct_id_seed);
+#ifdef CONFIG_64BIT
+	return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+	return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
+static void
+clean_from_lists(struct nf_conn *ct)
+{
+	pr_debug("clean_from_lists(%p)\n", ct);
+	hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+	hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
+
+	/* Destroy all pending expectations */
+	nf_ct_remove_expectations(ct);
+}
+
+#define NFCT_ALIGN(len)	(((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
+
+/* Released via nf_ct_destroy() */
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+				 const struct nf_conntrack_zone *zone,
+				 gfp_t flags)
+{
+	struct nf_conn *tmpl, *p;
+
+	if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) {
+		tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags);
+		if (!tmpl)
+			return NULL;
+
+		p = tmpl;
+		tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
+		if (tmpl != p) {
+			tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
+			tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p;
+		}
+	} else {
+		tmpl = kzalloc(sizeof(*tmpl), flags);
+		if (!tmpl)
+			return NULL;
+	}
+
+	tmpl->status = IPS_TEMPLATE;
+	write_pnet(&tmpl->ct_net, net);
+	nf_ct_zone_add(tmpl, zone);
+	refcount_set(&tmpl->ct_general.use, 1);
+
+	return tmpl;
+}
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
+
+void nf_ct_tmpl_free(struct nf_conn *tmpl)
+{
+	kfree(tmpl->ext);
+
+	if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
+		kfree((char *)tmpl - tmpl->proto.tmpl_padto);
+	else
+		kfree(tmpl);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
+
+static void destroy_gre_conntrack(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	struct nf_conn *master = ct->master;
+
+	if (master)
+		nf_ct_gre_keymap_destroy(master);
+#endif
+}
+
+void nf_ct_destroy(struct nf_conntrack *nfct)
+{
+	struct nf_conn *ct = (struct nf_conn *)nfct;
+
+	pr_debug("%s(%p)\n", __func__, ct);
+	WARN_ON(refcount_read(&nfct->use) != 0);
+
+	if (unlikely(nf_ct_is_template(ct))) {
+		nf_ct_tmpl_free(ct);
+		return;
+	}
+
+	if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
+		destroy_gre_conntrack(ct);
+
+	/* Expectations will have been removed in clean_from_lists,
+	 * except TFTP can create an expectation on the first packet,
+	 * before connection is in the list, so we need to clean here,
+	 * too.
+	 */
+	nf_ct_remove_expectations(ct);
+
+	if (ct->master)
+		nf_ct_put(ct->master);
+
+	pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
+	nf_conntrack_free(ct);
+}
+EXPORT_SYMBOL(nf_ct_destroy);
+
+static void __nf_ct_delete_from_lists(struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	unsigned int hash, reply_hash;
+	unsigned int sequence;
+
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		hash = hash_conntrack(net,
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
+		reply_hash = hash_conntrack(net,
+					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+					   nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
+	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
+	clean_from_lists(ct);
+	nf_conntrack_double_unlock(hash, reply_hash);
+}
+
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
+{
+	nf_ct_helper_destroy(ct);
+	local_bh_disable();
+
+	__nf_ct_delete_from_lists(ct);
+
+	local_bh_enable();
+}
+
+static void nf_ct_add_to_ecache_list(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct));
+
+	spin_lock(&cnet->ecache.dying_lock);
+	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+				 &cnet->ecache.dying_list);
+	spin_unlock(&cnet->ecache.dying_lock);
+#endif
+}
+
+bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
+{
+	struct nf_conn_tstamp *tstamp;
+	struct net *net;
+
+	if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+		return false;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+
+		tstamp->stop = ktime_get_real_ns();
+		if (timeout < 0)
+			tstamp->stop -= jiffies_to_nsecs(-timeout);
+	}
+
+	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+				    portid, report) < 0) {
+		/* destroy event was not delivered. nf_ct_put will
+		 * be done by event cache worker on redelivery.
+		 */
+		nf_ct_helper_destroy(ct);
+		local_bh_disable();
+		__nf_ct_delete_from_lists(ct);
+		nf_ct_add_to_ecache_list(ct);
+		local_bh_enable();
+
+		nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL);
+		return false;
+	}
+
+	net = nf_ct_net(ct);
+	if (nf_conntrack_ecache_dwork_pending(net))
+		nf_conntrack_ecache_work(net, NFCT_ECACHE_DESTROY_SENT);
+	nf_ct_delete_from_lists(ct);
+	nf_ct_put(ct);
+	return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete);
+
+static inline bool
+nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
+		const struct nf_conntrack_tuple *tuple,
+		const struct nf_conntrack_zone *zone,
+		const struct net *net)
+{
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+	/* A conntrack can be recreated with the equal tuple,
+	 * so we need to check that the conntrack is confirmed
+	 */
+	return nf_ct_tuple_equal(tuple, &h->tuple) &&
+	       nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
+	       nf_ct_is_confirmed(ct) &&
+	       net_eq(net, nf_ct_net(ct));
+}
+
+static inline bool
+nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+	return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+	       nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+				 &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+	       nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
+	       nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
+	       net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
+}
+
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
+{
+	if (!refcount_inc_not_zero(&ct->ct_general.use))
+		return;
+
+	/* load ->status after refcount increase */
+	smp_acquire__after_ctrl_dep();
+
+	if (nf_ct_should_gc(ct))
+		nf_ct_kill(ct);
+
+	nf_ct_put(ct);
+}
+
+/*
+ * Warning :
+ * - Caller must take a reference on returned object
+ *   and recheck nf_ct_tuple_equal(tuple, &h->tuple)
+ */
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
+		      const struct nf_conntrack_tuple *tuple, u32 hash)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_head *ct_hash;
+	struct hlist_nulls_node *n;
+	unsigned int bucket, hsize;
+
+begin:
+	nf_conntrack_get_ht(&ct_hash, &hsize);
+	bucket = reciprocal_scale(hash, hsize);
+
+	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+		struct nf_conn *ct;
+
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (nf_ct_is_expired(ct)) {
+			nf_ct_gc_expired(ct);
+			continue;
+		}
+
+		if (nf_ct_key_equal(h, tuple, zone, net))
+			return h;
+	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(n) != bucket) {
+		NF_CT_STAT_INC_ATOMIC(net, search_restart);
+		goto begin;
+	}
+
+	return NULL;
+}
+
+/* Find a connection corresponding to a tuple. */
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
+			const struct nf_conntrack_tuple *tuple, u32 hash)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+
+	rcu_read_lock();
+
+	h = ____nf_conntrack_find(net, zone, tuple, hash);
+	if (h) {
+		/* We have a candidate that matches the tuple we're interested
+		 * in, try to obtain a reference and re-check tuple
+		 */
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
+			/* re-check key after refcount */
+			smp_acquire__after_ctrl_dep();
+
+			if (likely(nf_ct_key_equal(h, tuple, zone, net)))
+				goto found;
+
+			/* TYPESAFE_BY_RCU recycled the candidate */
+			nf_ct_put(ct);
+		}
+
+		h = NULL;
+	}
+found:
+	rcu_read_unlock();
+
+	return h;
+}
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
+		      const struct nf_conntrack_tuple *tuple)
+{
+	unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+	struct nf_conntrack_tuple_hash *thash;
+
+	thash = __nf_conntrack_find_get(net, zone, tuple,
+					hash_conntrack_raw(tuple, zone_id, net));
+
+	if (thash)
+		return thash;
+
+	rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+	if (rid != zone_id)
+		return __nf_conntrack_find_get(net, zone, tuple,
+					       hash_conntrack_raw(tuple, rid, net));
+	return thash;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
+
+static void __nf_conntrack_hash_insert(struct nf_conn *ct,
+				       unsigned int hash,
+				       unsigned int reply_hash)
+{
+	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+			   &nf_conntrack_hash[hash]);
+	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+			   &nf_conntrack_hash[reply_hash]);
+}
+
+static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext)
+{
+	/* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions
+	 * may contain stale pointers to e.g. helper that has been removed.
+	 *
+	 * The helper can't clear this because the nf_conn object isn't in
+	 * any hash and synchronize_rcu() isn't enough because associated skb
+	 * might sit in a queue.
+	 */
+	return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid);
+}
+
+static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext)
+{
+	if (!ext)
+		return true;
+
+	if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid))
+		return false;
+
+	/* inserted into conntrack table, nf_ct_iterate_cleanup()
+	 * will find it.  Disable nf_ct_ext_find() id check.
+	 */
+	WRITE_ONCE(ext->gen_id, 0);
+	return true;
+}
+
+int
+nf_conntrack_hash_check_insert(struct nf_conn *ct)
+{
+	const struct nf_conntrack_zone *zone;
+	struct net *net = nf_ct_net(ct);
+	unsigned int hash, reply_hash;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	unsigned int max_chainlen;
+	unsigned int chainlen = 0;
+	unsigned int sequence;
+	int err = -EEXIST;
+
+	zone = nf_ct_zone(ct);
+
+	if (!nf_ct_ext_valid_pre(ct->ext))
+		return -EAGAIN;
+
+	local_bh_disable();
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		hash = hash_conntrack(net,
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
+		reply_hash = hash_conntrack(net,
+					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+					   nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
+	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
+	max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
+	/* See if there's one in the list already, including reverse */
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
+		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				    zone, net))
+			goto out;
+
+		if (chainlen++ > max_chainlen)
+			goto chaintoolong;
+	}
+
+	chainlen = 0;
+
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
+		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				    zone, net))
+			goto out;
+		if (chainlen++ > max_chainlen)
+			goto chaintoolong;
+	}
+
+	/* If genid has changed, we can't insert anymore because ct
+	 * extensions could have stale pointers and nf_ct_iterate_destroy
+	 * might have completed its table scan already.
+	 *
+	 * Increment of the ext genid right after this check is fine:
+	 * nf_ct_iterate_destroy blocks until locks are released.
+	 */
+	if (!nf_ct_ext_valid_post(ct->ext)) {
+		err = -EAGAIN;
+		goto out;
+	}
+
+	smp_wmb();
+	/* The caller holds a reference to this object */
+	refcount_set(&ct->ct_general.use, 2);
+	__nf_conntrack_hash_insert(ct, hash, reply_hash);
+	nf_conntrack_double_unlock(hash, reply_hash);
+	NF_CT_STAT_INC(net, insert);
+	local_bh_enable();
+
+	return 0;
+chaintoolong:
+	NF_CT_STAT_INC(net, chaintoolong);
+	err = -ENOSPC;
+out:
+	nf_conntrack_double_unlock(hash, reply_hash);
+	local_bh_enable();
+	return err;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+
+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
+		    unsigned int bytes)
+{
+	struct nf_conn_acct *acct;
+
+	acct = nf_conn_acct_find(ct);
+	if (acct) {
+		struct nf_conn_counter *counter = acct->counter;
+
+		atomic64_add(packets, &counter[dir].packets);
+		atomic64_add(bytes, &counter[dir].bytes);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
+
+static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			     const struct nf_conn *loser_ct)
+{
+	struct nf_conn_acct *acct;
+
+	acct = nf_conn_acct_find(loser_ct);
+	if (acct) {
+		struct nf_conn_counter *counter = acct->counter;
+		unsigned int bytes;
+
+		/* u32 should be fine since we must have seen one packet. */
+		bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
+	}
+}
+
+static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+{
+	struct nf_conn_tstamp *tstamp;
+
+	refcount_inc(&ct->ct_general.use);
+
+	/* set conntrack timestamp, if enabled. */
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp)
+		tstamp->start = ktime_get_real_ns();
+}
+
+/* caller must hold locks to prevent concurrent changes */
+static int __nf_ct_resolve_clash(struct sk_buff *skb,
+				 struct nf_conntrack_tuple_hash *h)
+{
+	/* This is the conntrack entry already in hashes that won race. */
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *loser_ct;
+
+	loser_ct = nf_ct_get(skb, &ctinfo);
+
+	if (nf_ct_is_dying(ct))
+		return NF_DROP;
+
+	if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+	    nf_ct_match(ct, loser_ct)) {
+		struct net *net = nf_ct_net(ct);
+
+		nf_conntrack_get(&ct->ct_general);
+
+		nf_ct_acct_merge(ct, ctinfo, loser_ct);
+		nf_ct_put(loser_ct);
+		nf_ct_set(skb, ct, ctinfo);
+
+		NF_CT_STAT_INC(net, clash_resolve);
+		return NF_ACCEPT;
+	}
+
+	return NF_DROP;
+}
+
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+	struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+	const struct nf_conntrack_zone *zone;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	struct net *net;
+
+	zone = nf_ct_zone(loser_ct);
+	net = nf_ct_net(loser_ct);
+
+	/* Reply direction must never result in a clash, unless both origin
+	 * and reply tuples are identical.
+	 */
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+		if (nf_ct_key_equal(h,
+				    &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				    zone, net))
+			return __nf_ct_resolve_clash(skb, h);
+	}
+
+	/* We want the clashing entry to go away real soon: 1 second timeout. */
+	WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
+
+	/* IPS_NAT_CLASH removes the entry automatically on the first
+	 * reply.  Also prevents UDP tracker from moving the entry to
+	 * ASSURED state, i.e. the entry can always be evicted under
+	 * pressure.
+	 */
+	loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+	__nf_conntrack_insert_prepare(loser_ct);
+
+	/* fake add for ORIGINAL dir: we want lookups to only find the entry
+	 * already in the table.  This also hides the clashing entry from
+	 * ctnetlink iteration, i.e. conntrack -L won't show them.
+	 */
+	hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+	hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+				 &nf_conntrack_hash[repl_idx]);
+
+	NF_CT_STAT_INC(net, clash_resolve);
+	return NF_ACCEPT;
+}
+
+/**
+ * nf_ct_resolve_clash - attempt to handle clash without packet drop
+ *
+ * @skb: skb that causes the clash
+ * @h: tuplehash of the clashing entry already in table
+ * @reply_hash: hash slot for reply direction
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple.
+ *
+ * If there is one, @skb (and the assocated, unconfirmed conntrack) has
+ * to be dropped.  In case @skb is retransmitted, next conntrack lookup
+ * will find the already-existing entry.
+ *
+ * The major problem with such packet drop is the extra delay added by
+ * the packet loss -- it will take some time for a retransmit to occur
+ * (or the sender to time out when waiting for a reply).
+ *
+ * This function attempts to handle the situation without packet drop.
+ *
+ * If @skb has no NAT transformation or if the colliding entries are
+ * exactly the same, only the to-be-confirmed conntrack entry is discarded
+ * and @skb is associated with the conntrack entry already in the table.
+ *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added only in the non-clashing REPLY direction,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry.  The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will only see reply traffic.
+ *
+ * Returns NF_DROP if the clash could not be resolved.
+ */
+static __cold noinline int
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+		    u32 reply_hash)
+{
+	/* This is the conntrack entry already in hashes that won race. */
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+	const struct nf_conntrack_l4proto *l4proto;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *loser_ct;
+	struct net *net;
+	int ret;
+
+	loser_ct = nf_ct_get(skb, &ctinfo);
+	net = nf_ct_net(loser_ct);
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+	if (!l4proto->allow_clash)
+		goto drop;
+
+	ret = __nf_ct_resolve_clash(skb, h);
+	if (ret == NF_ACCEPT)
+		return ret;
+
+	ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+	if (ret == NF_ACCEPT)
+		return ret;
+
+drop:
+	NF_CT_STAT_INC(net, drop);
+	NF_CT_STAT_INC(net, insert_failed);
+	return NF_DROP;
+}
+
+/* Confirm a connection given skb; places it in hash table */
+int
+__nf_conntrack_confirm(struct sk_buff *skb)
+{
+	unsigned int chainlen = 0, sequence, max_chainlen;
+	const struct nf_conntrack_zone *zone;
+	unsigned int hash, reply_hash;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct nf_conn_help *help;
+	struct hlist_nulls_node *n;
+	enum ip_conntrack_info ctinfo;
+	struct net *net;
+	int ret = NF_DROP;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	net = nf_ct_net(ct);
+
+	/* ipt_REJECT uses nf_conntrack_attach to attach related
+	   ICMP/TCP RST packets in other direction.  Actual packet
+	   which created connection will be IP_CT_NEW or for an
+	   expected connection, IP_CT_RELATED. */
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		return NF_ACCEPT;
+
+	zone = nf_ct_zone(ct);
+	local_bh_disable();
+
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		/* reuse the hash saved before */
+		hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+		hash = scale_hash(hash);
+		reply_hash = hash_conntrack(net,
+					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+					   nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
+	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
+	/* We're not in hash table, and we refuse to set up related
+	 * connections for unconfirmed conns.  But packet copies and
+	 * REJECT will give spurious warnings here.
+	 */
+
+	/* Another skb with the same unconfirmed conntrack may
+	 * win the race. This may happen for bridge(br_flood)
+	 * or broadcast/multicast packets do skb_clone with
+	 * unconfirmed conntrack.
+	 */
+	if (unlikely(nf_ct_is_confirmed(ct))) {
+		WARN_ON_ONCE(1);
+		nf_conntrack_double_unlock(hash, reply_hash);
+		local_bh_enable();
+		return NF_DROP;
+	}
+
+	if (!nf_ct_ext_valid_pre(ct->ext)) {
+		NF_CT_STAT_INC(net, insert_failed);
+		goto dying;
+	}
+
+	pr_debug("Confirming conntrack %p\n", ct);
+	/* We have to check the DYING flag after unlink to prevent
+	 * a race against nf_ct_get_next_corpse() possibly called from
+	 * user context, else we insert an already 'dead' hash, blocking
+	 * further use of that particular connection -JM.
+	 */
+	ct->status |= IPS_CONFIRMED;
+
+	if (unlikely(nf_ct_is_dying(ct))) {
+		NF_CT_STAT_INC(net, insert_failed);
+		goto dying;
+	}
+
+	max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+	/* See if there's one in the list already, including reverse:
+	   NAT could have grabbed it without realizing, since we're
+	   not in the hash.  If there is, we lost race. */
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
+		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				    zone, net))
+			goto out;
+		if (chainlen++ > max_chainlen)
+			goto chaintoolong;
+	}
+
+	chainlen = 0;
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
+		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				    zone, net))
+			goto out;
+		if (chainlen++ > max_chainlen) {
+chaintoolong:
+			NF_CT_STAT_INC(net, chaintoolong);
+			NF_CT_STAT_INC(net, insert_failed);
+			ret = NF_DROP;
+			goto dying;
+		}
+	}
+
+	/* Timer relative to confirmation time, not original
+	   setting time, otherwise we'd get timer wrap in
+	   weird delay cases. */
+	ct->timeout += nfct_time_stamp;
+
+	__nf_conntrack_insert_prepare(ct);
+
+	/* Since the lookup is lockless, hash insertion must be done after
+	 * starting the timer and setting the CONFIRMED bit. The RCU barriers
+	 * guarantee that no other CPU can find the conntrack before the above
+	 * stores are visible.
+	 */
+	__nf_conntrack_hash_insert(ct, hash, reply_hash);
+	nf_conntrack_double_unlock(hash, reply_hash);
+	local_bh_enable();
+
+	/* ext area is still valid (rcu read lock is held,
+	 * but will go out of scope soon, we need to remove
+	 * this conntrack again.
+	 */
+	if (!nf_ct_ext_valid_post(ct->ext)) {
+		nf_ct_kill(ct);
+		NF_CT_STAT_INC_ATOMIC(net, drop);
+		return NF_DROP;
+	}
+
+	help = nfct_help(ct);
+	if (help && help->helper)
+		nf_conntrack_event_cache(IPCT_HELPER, ct);
+
+	nf_conntrack_event_cache(master_ct(ct) ?
+				 IPCT_RELATED : IPCT_NEW, ct);
+	return NF_ACCEPT;
+
+out:
+	ret = nf_ct_resolve_clash(skb, h, reply_hash);
+dying:
+	nf_conntrack_double_unlock(hash, reply_hash);
+	local_bh_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
+
+/* Returns true if a connection correspondings to the tuple (required
+   for NAT). */
+int
+nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conn *ignored_conntrack)
+{
+	struct net *net = nf_ct_net(ignored_conntrack);
+	const struct nf_conntrack_zone *zone;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_head *ct_hash;
+	unsigned int hash, hsize;
+	struct hlist_nulls_node *n;
+	struct nf_conn *ct;
+
+	zone = nf_ct_zone(ignored_conntrack);
+
+	rcu_read_lock();
+ begin:
+	nf_conntrack_get_ht(&ct_hash, &hsize);
+	hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
+
+	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+
+		if (ct == ignored_conntrack)
+			continue;
+
+		if (nf_ct_is_expired(ct)) {
+			nf_ct_gc_expired(ct);
+			continue;
+		}
+
+		if (nf_ct_key_equal(h, tuple, zone, net)) {
+			/* Tuple is taken already, so caller will need to find
+			 * a new source port to use.
+			 *
+			 * Only exception:
+			 * If the *original tuples* are identical, then both
+			 * conntracks refer to the same flow.
+			 * This is a rare situation, it can occur e.g. when
+			 * more than one UDP packet is sent from same socket
+			 * in different threads.
+			 *
+			 * Let nf_ct_resolve_clash() deal with this later.
+			 */
+			if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+					      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+					      nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL))
+				continue;
+
+			NF_CT_STAT_INC_ATOMIC(net, found);
+			rcu_read_unlock();
+			return 1;
+		}
+	}
+
+	if (get_nulls_value(n) != hash) {
+		NF_CT_STAT_INC_ATOMIC(net, search_restart);
+		goto begin;
+	}
+
+	rcu_read_unlock();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
+
+#define NF_CT_EVICTION_RANGE	8
+
+/* There's a small race here where we may free a just-assured
+   connection.  Too bad: we're in trouble anyway. */
+static unsigned int early_drop_list(struct net *net,
+				    struct hlist_nulls_head *head)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	unsigned int drops = 0;
+	struct nf_conn *tmp;
+
+	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+		tmp = nf_ct_tuplehash_to_ctrack(h);
+
+		if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+			continue;
+
+		if (nf_ct_is_expired(tmp)) {
+			nf_ct_gc_expired(tmp);
+			continue;
+		}
+
+		if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+		    !net_eq(nf_ct_net(tmp), net) ||
+		    nf_ct_is_dying(tmp))
+			continue;
+
+		if (!refcount_inc_not_zero(&tmp->ct_general.use))
+			continue;
+
+		/* load ->ct_net and ->status after refcount increase */
+		smp_acquire__after_ctrl_dep();
+
+		/* kill only if still in same netns -- might have moved due to
+		 * SLAB_TYPESAFE_BY_RCU rules.
+		 *
+		 * We steal the timer reference.  If that fails timer has
+		 * already fired or someone else deleted it. Just drop ref
+		 * and move to next entry.
+		 */
+		if (net_eq(nf_ct_net(tmp), net) &&
+		    nf_ct_is_confirmed(tmp) &&
+		    nf_ct_delete(tmp, 0, 0))
+			drops++;
+
+		nf_ct_put(tmp);
+	}
+
+	return drops;
+}
+
+static noinline int early_drop(struct net *net, unsigned int hash)
+{
+	unsigned int i, bucket;
+
+	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+		struct hlist_nulls_head *ct_hash;
+		unsigned int hsize, drops;
+
+		rcu_read_lock();
+		nf_conntrack_get_ht(&ct_hash, &hsize);
+		if (!i)
+			bucket = reciprocal_scale(hash, hsize);
+		else
+			bucket = (bucket + 1) % hsize;
+
+		drops = early_drop_list(net, &ct_hash[bucket]);
+		rcu_read_unlock();
+
+		if (drops) {
+			NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool gc_worker_skip_ct(const struct nf_conn *ct)
+{
+	return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
+}
+
+static bool gc_worker_can_early_drop(const struct nf_conn *ct)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+
+	if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+		return true;
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+	if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
+		return true;
+
+	return false;
+}
+
+static void gc_worker(struct work_struct *work)
+{
+	unsigned int i, hashsz, nf_conntrack_max95 = 0;
+	u32 end_time, start_time = nfct_time_stamp;
+	struct conntrack_gc_work *gc_work;
+	unsigned int expired_count = 0;
+	unsigned long next_run;
+	s32 delta_time;
+	long count;
+
+	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+	i = gc_work->next_bucket;
+	if (gc_work->early_drop)
+		nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+
+	if (i == 0) {
+		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+		gc_work->count = GC_SCAN_INITIAL_COUNT;
+		gc_work->start_time = start_time;
+	}
+
+	next_run = gc_work->avg_timeout;
+	count = gc_work->count;
+
+	end_time = start_time + GC_SCAN_MAX_DURATION;
+
+	do {
+		struct nf_conntrack_tuple_hash *h;
+		struct hlist_nulls_head *ct_hash;
+		struct hlist_nulls_node *n;
+		struct nf_conn *tmp;
+
+		rcu_read_lock();
+
+		nf_conntrack_get_ht(&ct_hash, &hashsz);
+		if (i >= hashsz) {
+			rcu_read_unlock();
+			break;
+		}
+
+		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+			struct nf_conntrack_net *cnet;
+			struct net *net;
+			long expires;
+
+			tmp = nf_ct_tuplehash_to_ctrack(h);
+
+			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+				nf_ct_offload_timeout(tmp);
+				continue;
+			}
+
+			if (expired_count > GC_SCAN_EXPIRED_MAX) {
+				rcu_read_unlock();
+
+				gc_work->next_bucket = i;
+				gc_work->avg_timeout = next_run;
+				gc_work->count = count;
+
+				delta_time = nfct_time_stamp - gc_work->start_time;
+
+				/* re-sched immediately if total cycle time is exceeded */
+				next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+				goto early_exit;
+			}
+
+			if (nf_ct_is_expired(tmp)) {
+				nf_ct_gc_expired(tmp);
+				expired_count++;
+				continue;
+			}
+
+			expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+			expires = (expires - (long)next_run) / ++count;
+			next_run += expires;
+
+			if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+				continue;
+
+			net = nf_ct_net(tmp);
+			cnet = nf_ct_pernet(net);
+			if (atomic_read(&cnet->count) < nf_conntrack_max95)
+				continue;
+
+			/* need to take reference to avoid possible races */
+			if (!refcount_inc_not_zero(&tmp->ct_general.use))
+				continue;
+
+			/* load ->status after refcount increase */
+			smp_acquire__after_ctrl_dep();
+
+			if (gc_worker_skip_ct(tmp)) {
+				nf_ct_put(tmp);
+				continue;
+			}
+
+			if (gc_worker_can_early_drop(tmp)) {
+				nf_ct_kill(tmp);
+				expired_count++;
+			}
+
+			nf_ct_put(tmp);
+		}
+
+		/* could check get_nulls_value() here and restart if ct
+		 * was moved to another chain.  But given gc is best-effort
+		 * we will just continue with next hash slot.
+		 */
+		rcu_read_unlock();
+		cond_resched();
+		i++;
+
+		delta_time = nfct_time_stamp - end_time;
+		if (delta_time > 0 && i < hashsz) {
+			gc_work->avg_timeout = next_run;
+			gc_work->count = count;
+			gc_work->next_bucket = i;
+			next_run = 0;
+			goto early_exit;
+		}
+	} while (i < hashsz);
+
+	gc_work->next_bucket = 0;
+
+	next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+	delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+	if (next_run > (unsigned long)delta_time)
+		next_run -= delta_time;
+	else
+		next_run = 1;
+
+early_exit:
+	if (gc_work->exiting)
+		return;
+
+	if (next_run)
+		gc_work->early_drop = false;
+
+	queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+	INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+	gc_work->exiting = false;
+}
+
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net,
+		     const struct nf_conntrack_zone *zone,
+		     const struct nf_conntrack_tuple *orig,
+		     const struct nf_conntrack_tuple *repl,
+		     gfp_t gfp, u32 hash)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	unsigned int ct_count;
+	struct nf_conn *ct;
+
+	/* We don't want any race condition at early drop stage */
+	ct_count = atomic_inc_return(&cnet->count);
+
+	if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
+		if (!early_drop(net, hash)) {
+			if (!conntrack_gc_work.early_drop)
+				conntrack_gc_work.early_drop = true;
+			atomic_dec(&cnet->count);
+			net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	/*
+	 * Do not use kmem_cache_zalloc(), as this cache uses
+	 * SLAB_TYPESAFE_BY_RCU.
+	 */
+	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	if (ct == NULL)
+		goto out;
+
+	spin_lock_init(&ct->lock);
+	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
+	ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
+	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
+	/* save hash for reusing when confirming */
+	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
+	ct->status = 0;
+	WRITE_ONCE(ct->timeout, 0);
+	write_pnet(&ct->ct_net, net);
+	memset_after(ct, 0, __nfct_init_offset);
+
+	nf_ct_zone_add(ct, zone);
+
+	/* Because we use RCU lookups, we set ct_general.use to zero before
+	 * this is inserted in any list.
+	 */
+	refcount_set(&ct->ct_general.use, 0);
+	return ct;
+out:
+	atomic_dec(&cnet->count);
+	return ERR_PTR(-ENOMEM);
+}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+				   const struct nf_conntrack_zone *zone,
+				   const struct nf_conntrack_tuple *orig,
+				   const struct nf_conntrack_tuple *repl,
+				   gfp_t gfp)
+{
+	return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
+
+void nf_conntrack_free(struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_net *cnet;
+
+	/* A freed object has refcnt == 0, that's
+	 * the golden rule for SLAB_TYPESAFE_BY_RCU
+	 */
+	WARN_ON(refcount_read(&ct->ct_general.use) != 0);
+
+	if (ct->status & IPS_SRC_NAT_DONE) {
+		const struct nf_nat_hook *nat_hook;
+
+		rcu_read_lock();
+		nat_hook = rcu_dereference(nf_nat_hook);
+		if (nat_hook)
+			nat_hook->remove_nat_bysrc(ct);
+		rcu_read_unlock();
+	}
+
+	kfree(ct->ext);
+	kmem_cache_free(nf_conntrack_cachep, ct);
+	cnet = nf_ct_pernet(net);
+
+	smp_mb__before_atomic();
+	atomic_dec(&cnet->count);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_free);
+
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+   failed due to stress.  Otherwise it really is unclassifiable. */
+static noinline struct nf_conntrack_tuple_hash *
+init_conntrack(struct net *net, struct nf_conn *tmpl,
+	       const struct nf_conntrack_tuple *tuple,
+	       struct sk_buff *skb,
+	       unsigned int dataoff, u32 hash)
+{
+	struct nf_conn *ct;
+	struct nf_conn_help *help;
+	struct nf_conntrack_tuple repl_tuple;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	struct nf_conntrack_ecache *ecache;
+#endif
+	struct nf_conntrack_expect *exp = NULL;
+	const struct nf_conntrack_zone *zone;
+	struct nf_conn_timeout *timeout_ext;
+	struct nf_conntrack_zone tmp;
+	struct nf_conntrack_net *cnet;
+
+	if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
+		pr_debug("Can't invert tuple.\n");
+		return NULL;
+	}
+
+	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+	ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+				  hash);
+	if (IS_ERR(ct))
+		return (struct nf_conntrack_tuple_hash *)ct;
+
+	if (!nf_ct_add_synproxy(ct, tmpl)) {
+		nf_conntrack_free(ct);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
+
+	if (timeout_ext)
+		nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
+				      GFP_ATOMIC);
+
+	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
+
+	if ((ecache || net->ct.sysctl_events) &&
+	    !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+				  ecache ? ecache->expmask : 0,
+				  GFP_ATOMIC)) {
+		nf_conntrack_free(ct);
+		return ERR_PTR(-ENOMEM);
+	}
+#endif
+
+	cnet = nf_ct_pernet(net);
+	if (cnet->expect_count) {
+		spin_lock_bh(&nf_conntrack_expect_lock);
+		exp = nf_ct_find_expectation(net, zone, tuple);
+		if (exp) {
+			pr_debug("expectation arrives ct=%p exp=%p\n",
+				 ct, exp);
+			/* Welcome, Mr. Bond.  We've been expecting you... */
+			__set_bit(IPS_EXPECTED_BIT, &ct->status);
+			/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
+			ct->master = exp->master;
+			if (exp->helper) {
+				help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+				if (help)
+					rcu_assign_pointer(help->helper, exp->helper);
+			}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+			ct->mark = READ_ONCE(exp->master->mark);
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+			ct->secmark = exp->master->secmark;
+#endif
+			NF_CT_STAT_INC(net, expect_new);
+		}
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+	}
+	if (!exp && tmpl)
+		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
+
+	/* Other CPU might have obtained a pointer to this object before it was
+	 * released.  Because refcount is 0, refcount_inc_not_zero() will fail.
+	 *
+	 * After refcount_set(1) it will succeed; ensure that zeroing of
+	 * ct->status and the correct ct->net pointer are visible; else other
+	 * core might observe CONFIRMED bit which means the entry is valid and
+	 * in the hash table, but its not (anymore).
+	 */
+	smp_wmb();
+
+	/* Now it is going to be associated with an sk_buff, set refcount to 1. */
+	refcount_set(&ct->ct_general.use, 1);
+
+	if (exp) {
+		if (exp->expectfn)
+			exp->expectfn(ct, exp);
+		nf_ct_expect_put(exp);
+	}
+
+	return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
+}
+
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
+resolve_normal_ct(struct nf_conn *tmpl,
+		  struct sk_buff *skb,
+		  unsigned int dataoff,
+		  u_int8_t protonum,
+		  const struct nf_hook_state *state)
+{
+	const struct nf_conntrack_zone *zone;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_hash *h;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+	u32 hash, zone_id, rid;
+	struct nf_conn *ct;
+
+	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
+			     dataoff, state->pf, protonum, state->net,
+			     &tuple)) {
+		pr_debug("Can't get tuple\n");
+		return 0;
+	}
+
+	/* look for tuple match */
+	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+	zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+	hash = hash_conntrack_raw(&tuple, zone_id, state->net);
+	h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+	if (!h) {
+		rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+		if (zone_id != rid) {
+			u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+			h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+		}
+	}
+
+	if (!h) {
+		h = init_conntrack(state->net, tmpl, &tuple,
+				   skb, dataoff, hash);
+		if (!h)
+			return 0;
+		if (IS_ERR(h))
+			return PTR_ERR(h);
+	}
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	/* It exists; we have (non-exclusive) reference. */
+	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
+		ctinfo = IP_CT_ESTABLISHED_REPLY;
+	} else {
+		/* Once we've had two way comms, always ESTABLISHED. */
+		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+			pr_debug("normal packet for %p\n", ct);
+			ctinfo = IP_CT_ESTABLISHED;
+		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
+			pr_debug("related packet for %p\n", ct);
+			ctinfo = IP_CT_RELATED;
+		} else {
+			pr_debug("new packet for %p\n", ct);
+			ctinfo = IP_CT_NEW;
+		}
+	}
+	nf_ct_set(skb, ct, ctinfo);
+	return 0;
+}
+
+/*
+ * icmp packets need special treatment to handle error messages that are
+ * related to a connection.
+ *
+ * Callers need to check if skb has a conntrack assigned when this
+ * helper returns; in such case skb belongs to an already known connection.
+ */
+static unsigned int __cold
+nf_conntrack_handle_icmp(struct nf_conn *tmpl,
+			 struct sk_buff *skb,
+			 unsigned int dataoff,
+			 u8 protonum,
+			 const struct nf_hook_state *state)
+{
+	int ret;
+
+	if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP)
+		ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6)
+		ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state);
+#endif
+	else
+		return NF_ACCEPT;
+
+	if (ret <= 0)
+		NF_CT_STAT_INC_ATOMIC(state->net, error);
+
+	return ret;
+}
+
+static int generic_packet(struct nf_conn *ct, struct sk_buff *skb,
+			  enum ip_conntrack_info ctinfo)
+{
+	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+	return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int nf_conntrack_handle_packet(struct nf_conn *ct,
+				      struct sk_buff *skb,
+				      unsigned int dataoff,
+				      enum ip_conntrack_info ctinfo,
+				      const struct nf_hook_state *state)
+{
+	switch (nf_ct_protonum(ct)) {
+	case IPPROTO_TCP:
+		return nf_conntrack_tcp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_UDP:
+		return nf_conntrack_udp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_ICMP:
+		return nf_conntrack_icmp_packet(ct, skb, ctinfo, state);
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE:
+		return nf_conntrack_udplite_packet(ct, skb, dataoff,
+						   ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP:
+		return nf_conntrack_sctp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP:
+		return nf_conntrack_dccp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return nf_conntrack_gre_packet(ct, skb, dataoff,
+					       ctinfo, state);
+#endif
+	}
+
+	return generic_packet(ct, skb, ctinfo);
+}
+
+unsigned int
+nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct, *tmpl;
+	u_int8_t protonum;
+	int dataoff, ret;
+
+	tmpl = nf_ct_get(skb, &ctinfo);
+	if (tmpl || ctinfo == IP_CT_UNTRACKED) {
+		/* Previously seen (loopback or untracked)?  Ignore. */
+		if ((tmpl && !nf_ct_is_template(tmpl)) ||
+		     ctinfo == IP_CT_UNTRACKED)
+			return NF_ACCEPT;
+		skb->_nfct = 0;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
+	if (dataoff <= 0) {
+		pr_debug("not prepared to track yet or error occurred\n");
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+		ret = NF_ACCEPT;
+		goto out;
+	}
+
+	if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
+		ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
+					       protonum, state);
+		if (ret <= 0) {
+			ret = -ret;
+			goto out;
+		}
+		/* ICMP[v6] protocol trackers may assign one conntrack. */
+		if (skb->_nfct)
+			goto out;
+	}
+repeat:
+	ret = resolve_normal_ct(tmpl, skb, dataoff,
+				protonum, state);
+	if (ret < 0) {
+		/* Too stressed to deal. */
+		NF_CT_STAT_INC_ATOMIC(state->net, drop);
+		ret = NF_DROP;
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct) {
+		/* Not valid part of a connection */
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+		ret = NF_ACCEPT;
+		goto out;
+	}
+
+	ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
+	if (ret <= 0) {
+		/* Invalid: inverse of the return code tells
+		 * the netfilter core what to do */
+		pr_debug("nf_conntrack_in: Can't track with proto module\n");
+		nf_ct_put(ct);
+		skb->_nfct = 0;
+		/* Special case: TCP tracker reports an attempt to reopen a
+		 * closed/aborted connection. We have to go back and create a
+		 * fresh conntrack.
+		 */
+		if (ret == -NF_REPEAT)
+			goto repeat;
+
+		NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+		if (ret == -NF_DROP)
+			NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
+		ret = -ret;
+		goto out;
+	}
+
+	if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+	    !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+		nf_conntrack_event_cache(IPCT_REPLY, ct);
+out:
+	if (tmpl)
+		nf_ct_put(tmpl);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_in);
+
+/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
+   implicitly racy: see __nf_conntrack_confirm */
+void nf_conntrack_alter_reply(struct nf_conn *ct,
+			      const struct nf_conntrack_tuple *newreply)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+
+	/* Should be unconfirmed, so not in hash table yet */
+	WARN_ON(nf_ct_is_confirmed(ct));
+
+	pr_debug("Altering reply tuple of %p to ", ct);
+	nf_ct_dump_tuple(newreply);
+
+	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+	if (ct->master || (help && !hlist_empty(&help->expectations)))
+		return;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
+
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __nf_ct_refresh_acct(struct nf_conn *ct,
+			  enum ip_conntrack_info ctinfo,
+			  const struct sk_buff *skb,
+			  u32 extra_jiffies,
+			  bool do_acct)
+{
+	/* Only update if this is not a fixed timeout */
+	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+		goto acct;
+
+	/* If not in hash table, timer will not be active yet */
+	if (nf_ct_is_confirmed(ct))
+		extra_jiffies += nfct_time_stamp;
+
+	if (READ_ONCE(ct->timeout) != extra_jiffies)
+		WRITE_ONCE(ct->timeout, extra_jiffies);
+acct:
+	if (do_acct)
+		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+}
+EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
+
+bool nf_ct_kill_acct(struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     const struct sk_buff *skb)
+{
+	nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+
+	return nf_ct_delete(ct, 0, 0);
+}
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/mutex.h>
+
+/* Generic function for tcp/udp/sctp/dccp and alike. */
+int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
+	    nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
+
+const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_SRC_PORT]  = { .type = NLA_U16 },
+	[CTA_PROTO_DST_PORT]  = { .type = NLA_U16 },
+};
+EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
+
+int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags)
+{
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) {
+		if (!tb[CTA_PROTO_SRC_PORT])
+			return -EINVAL;
+
+		t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) {
+		if (!tb[CTA_PROTO_DST_PORT])
+			return -EINVAL;
+
+		t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+
+unsigned int nf_ct_port_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
+#endif
+
+/* Used by ipt_REJECT and ip6t_REJECT. */
+static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	/* This ICMP is in reverse direction to the packet which caused it */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+		ctinfo = IP_CT_RELATED_REPLY;
+	else
+		ctinfo = IP_CT_RELATED;
+
+	/* Attach to new skbuff, and increment count */
+	nf_ct_set(nskb, ct, ctinfo);
+	nf_conntrack_get(skb_nfct(nskb));
+}
+
+static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
+				 struct nf_conn *ct,
+				 enum ip_conntrack_info ctinfo)
+{
+	const struct nf_nat_hook *nat_hook;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	unsigned int status;
+	int dataoff;
+	u16 l3num;
+	u8 l4num;
+
+	l3num = nf_ct_l3num(ct);
+
+	dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+	if (dataoff <= 0)
+		return -1;
+
+	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
+			     l4num, net, &tuple))
+		return -1;
+
+	if (ct->status & IPS_SRC_NAT) {
+		memcpy(tuple.src.u3.all,
+		       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all,
+		       sizeof(tuple.src.u3.all));
+		tuple.src.u.all =
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all;
+	}
+
+	if (ct->status & IPS_DST_NAT) {
+		memcpy(tuple.dst.u3.all,
+		       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all,
+		       sizeof(tuple.dst.u3.all));
+		tuple.dst.u.all =
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all;
+	}
+
+	h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
+	if (!h)
+		return 0;
+
+	/* Store status bits of the conntrack that is clashing to re-do NAT
+	 * mangling according to what it has been done already to this packet.
+	 */
+	status = ct->status;
+
+	nf_ct_put(ct);
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	nf_ct_set(skb, ct, ctinfo);
+
+	nat_hook = rcu_dereference(nf_nat_hook);
+	if (!nat_hook)
+		return 0;
+
+	if (status & IPS_SRC_NAT &&
+	    nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
+				IP_CT_DIR_ORIGINAL) == NF_DROP)
+		return -1;
+
+	if (status & IPS_DST_NAT &&
+	    nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
+				IP_CT_DIR_ORIGINAL) == NF_DROP)
+		return -1;
+
+	return 0;
+}
+
+/* This packet is coming from userspace via nf_queue, complete the packet
+ * processing after the helper invocation in nf_confirm().
+ */
+static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo)
+{
+	const struct nf_conntrack_helper *helper;
+	const struct nf_conn_help *help;
+	int protoff;
+
+	help = nfct_help(ct);
+	if (!help)
+		return 0;
+
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return 0;
+
+	if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
+		return 0;
+
+	switch (nf_ct_l3num(ct)) {
+	case NFPROTO_IPV4:
+		protoff = skb_network_offset(skb) + ip_hdrlen(skb);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6: {
+		__be16 frag_off;
+		u8 pnum;
+
+		pnum = ipv6_hdr(skb)->nexthdr;
+		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+					   &frag_off);
+		if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+			return 0;
+		break;
+	}
+#endif
+	default:
+		return 0;
+	}
+
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return -1;
+		}
+	}
+
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
+}
+
+static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	int err;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return 0;
+
+	if (!nf_ct_is_confirmed(ct)) {
+		err = __nf_conntrack_update(net, skb, ct, ctinfo);
+		if (err < 0)
+			return err;
+
+		ct = nf_ct_get(skb, &ctinfo);
+	}
+
+	return nf_confirm_cthelper(skb, ct, ctinfo);
+}
+
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
+/* Bring out ya dead! */
+static struct nf_conn *
+get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
+		const struct nf_ct_iter_data *iter_data, unsigned int *bucket)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct hlist_nulls_node *n;
+	spinlock_t *lockp;
+
+	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+		struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+		if (hlist_nulls_empty(hslot))
+			continue;
+
+		lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
+		local_bh_disable();
+		nf_conntrack_lock(lockp);
+		hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+			if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+				continue;
+			/* All nf_conn objects are added to hash table twice, one
+			 * for original direction tuple, once for the reply tuple.
+			 *
+			 * Exception: In the IPS_NAT_CLASH case, only the reply
+			 * tuple is added (the original tuple already existed for
+			 * a different object).
+			 *
+			 * We only need to call the iterator once for each
+			 * conntrack, so we just use the 'reply' direction
+			 * tuple while iterating.
+			 */
+			ct = nf_ct_tuplehash_to_ctrack(h);
+
+			if (iter_data->net &&
+			    !net_eq(iter_data->net, nf_ct_net(ct)))
+				continue;
+
+			if (iter(ct, iter_data->data))
+				goto found;
+		}
+		spin_unlock(lockp);
+		local_bh_enable();
+		cond_resched();
+	}
+
+	return NULL;
+found:
+	refcount_inc(&ct->ct_general.use);
+	spin_unlock(lockp);
+	local_bh_enable();
+	return ct;
+}
+
+static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
+				  const struct nf_ct_iter_data *iter_data)
+{
+	unsigned int bucket = 0;
+	struct nf_conn *ct;
+
+	might_sleep();
+
+	mutex_lock(&nf_conntrack_mutex);
+	while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) {
+		/* Time to push up daises... */
+
+		nf_ct_delete(ct, iter_data->portid, iter_data->report);
+		nf_ct_put(ct);
+		cond_resched();
+	}
+	mutex_unlock(&nf_conntrack_mutex);
+}
+
+void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data),
+			       const struct nf_ct_iter_data *iter_data)
+{
+	struct net *net = iter_data->net;
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	might_sleep();
+
+	if (atomic_read(&cnet->count) == 0)
+		return;
+
+	nf_ct_iterate_cleanup(iter, iter_data);
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
+
+/**
+ * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table
+ * @iter: callback to invoke for each conntrack
+ * @data: data to pass to @iter
+ *
+ * Like nf_ct_iterate_cleanup, but first marks conntracks on the
+ * unconfirmed list as dying (so they will not be inserted into
+ * main table).
+ *
+ * Can only be called in module exit path.
+ */
+void
+nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
+{
+	struct nf_ct_iter_data iter_data = {};
+	struct net *net;
+
+	down_read(&net_rwsem);
+	for_each_net(net) {
+		struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+		if (atomic_read(&cnet->count) == 0)
+			continue;
+		nf_queue_nf_hook_drop(net);
+	}
+	up_read(&net_rwsem);
+
+	/* Need to wait for netns cleanup worker to finish, if its
+	 * running -- it might have deleted a net namespace from
+	 * the global list, so hook drop above might not have
+	 * affected all namespaces.
+	 */
+	net_ns_barrier();
+
+	/* a skb w. unconfirmed conntrack could have been reinjected just
+	 * before we called nf_queue_nf_hook_drop().
+	 *
+	 * This makes sure its inserted into conntrack table.
+	 */
+	synchronize_net();
+
+	nf_ct_ext_bump_genid();
+	iter_data.data = data;
+	nf_ct_iterate_cleanup(iter, &iter_data);
+
+	/* Another cpu might be in a rcu read section with
+	 * rcu protected pointer cleared in iter callback
+	 * or hidden via nf_ct_ext_bump_genid() above.
+	 *
+	 * Wait until those are done.
+	 */
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
+
+static int kill_all(struct nf_conn *i, void *data)
+{
+	return 1;
+}
+
+void nf_conntrack_cleanup_start(void)
+{
+	cleanup_nf_conntrack_bpf();
+	conntrack_gc_work.exiting = true;
+}
+
+void nf_conntrack_cleanup_end(void)
+{
+	RCU_INIT_POINTER(nf_ct_hook, NULL);
+	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+	kvfree(nf_conntrack_hash);
+
+	nf_conntrack_proto_fini();
+	nf_conntrack_helper_fini();
+	nf_conntrack_expect_fini();
+
+	kmem_cache_destroy(nf_conntrack_cachep);
+}
+
+/*
+ * Mishearing the voices in his head, our hero wonders how he's
+ * supposed to kill the mall.
+ */
+void nf_conntrack_cleanup_net(struct net *net)
+{
+	LIST_HEAD(single);
+
+	list_add(&net->exit_list, &single);
+	nf_conntrack_cleanup_net_list(&single);
+}
+
+void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
+{
+	struct nf_ct_iter_data iter_data = {};
+	struct net *net;
+	int busy;
+
+	/*
+	 * This makes sure all current packets have passed through
+	 *  netfilter framework.  Roll on, two-stage module
+	 *  delete...
+	 */
+	synchronize_net();
+i_see_dead_people:
+	busy = 0;
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+		iter_data.net = net;
+		nf_ct_iterate_cleanup_net(kill_all, &iter_data);
+		if (atomic_read(&cnet->count) != 0)
+			busy = 1;
+	}
+	if (busy) {
+		schedule();
+		goto i_see_dead_people;
+	}
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_conntrack_ecache_pernet_fini(net);
+		nf_conntrack_expect_pernet_fini(net);
+		free_percpu(net->ct.stat);
+	}
+}
+
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
+{
+	struct hlist_nulls_head *hash;
+	unsigned int nr_slots, i;
+
+	if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+		return NULL;
+
+	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
+	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+
+	hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL);
+
+	if (hash && nulls)
+		for (i = 0; i < nr_slots; i++)
+			INIT_HLIST_NULLS_HEAD(&hash[i], i);
+
+	return hash;
+}
+EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
+
+int nf_conntrack_hash_resize(unsigned int hashsize)
+{
+	int i, bucket;
+	unsigned int old_size;
+	struct hlist_nulls_head *hash, *old_hash;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+
+	if (!hashsize)
+		return -EINVAL;
+
+	hash = nf_ct_alloc_hashtable(&hashsize, 1);
+	if (!hash)
+		return -ENOMEM;
+
+	mutex_lock(&nf_conntrack_mutex);
+	old_size = nf_conntrack_htable_size;
+	if (old_size == hashsize) {
+		mutex_unlock(&nf_conntrack_mutex);
+		kvfree(hash);
+		return 0;
+	}
+
+	local_bh_disable();
+	nf_conntrack_all_lock();
+	write_seqcount_begin(&nf_conntrack_generation);
+
+	/* Lookups in the old hash might happen in parallel, which means we
+	 * might get false negatives during connection lookup. New connections
+	 * created because of a false negative won't make it into the hash
+	 * though since that required taking the locks.
+	 */
+
+	for (i = 0; i < nf_conntrack_htable_size; i++) {
+		while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+			unsigned int zone_id;
+
+			h = hlist_nulls_entry(nf_conntrack_hash[i].first,
+					      struct nf_conntrack_tuple_hash, hnnode);
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			hlist_nulls_del_rcu(&h->hnnode);
+
+			zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
+			bucket = __hash_conntrack(nf_ct_net(ct),
+						  &h->tuple, zone_id, hashsize);
+			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
+		}
+	}
+	old_hash = nf_conntrack_hash;
+
+	nf_conntrack_hash = hash;
+	nf_conntrack_htable_size = hashsize;
+
+	write_seqcount_end(&nf_conntrack_generation);
+	nf_conntrack_all_unlock();
+	local_bh_enable();
+
+	mutex_unlock(&nf_conntrack_mutex);
+
+	synchronize_net();
+	kvfree(old_hash);
+	return 0;
+}
+
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
+{
+	unsigned int hashsize;
+	int rc;
+
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
+	/* On boot, we can set this without any fancy locking. */
+	if (!nf_conntrack_hash)
+		return param_set_uint(val, kp);
+
+	rc = kstrtouint(val, 0, &hashsize);
+	if (rc)
+		return rc;
+
+	return nf_conntrack_hash_resize(hashsize);
+}
+
+int nf_conntrack_init_start(void)
+{
+	unsigned long nr_pages = totalram_pages();
+	int max_factor = 8;
+	int ret = -ENOMEM;
+	int i;
+
+	seqcount_spinlock_init(&nf_conntrack_generation,
+			       &nf_conntrack_locks_all_lock);
+
+	for (i = 0; i < CONNTRACK_LOCKS; i++)
+		spin_lock_init(&nf_conntrack_locks[i]);
+
+	if (!nf_conntrack_htable_size) {
+		nf_conntrack_htable_size
+			= (((nr_pages << PAGE_SHIFT) / 16384)
+			   / sizeof(struct hlist_head));
+		if (BITS_PER_LONG >= 64 &&
+		    nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+			nf_conntrack_htable_size = 262144;
+		else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
+			nf_conntrack_htable_size = 65536;
+
+		if (nf_conntrack_htable_size < 1024)
+			nf_conntrack_htable_size = 1024;
+		/* Use a max. factor of one by default to keep the average
+		 * hash chain length at 2 entries.  Each entry has to be added
+		 * twice (once for original direction, once for reply).
+		 * When a table size is given we use the old value of 8 to
+		 * avoid implicit reduction of the max entries setting.
+		 */
+		max_factor = 1;
+	}
+
+	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
+	if (!nf_conntrack_hash)
+		return -ENOMEM;
+
+	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+
+	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+						sizeof(struct nf_conn),
+						NFCT_INFOMASK + 1,
+						SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+	if (!nf_conntrack_cachep)
+		goto err_cachep;
+
+	ret = nf_conntrack_expect_init();
+	if (ret < 0)
+		goto err_expect;
+
+	ret = nf_conntrack_helper_init();
+	if (ret < 0)
+		goto err_helper;
+
+	ret = nf_conntrack_proto_init();
+	if (ret < 0)
+		goto err_proto;
+
+	conntrack_gc_work_init(&conntrack_gc_work);
+	queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
+
+	ret = register_nf_conntrack_bpf();
+	if (ret < 0)
+		goto err_kfunc;
+
+	return 0;
+
+err_kfunc:
+	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+	nf_conntrack_proto_fini();
+err_proto:
+	nf_conntrack_helper_fini();
+err_helper:
+	nf_conntrack_expect_fini();
+err_expect:
+	kmem_cache_destroy(nf_conntrack_cachep);
+err_cachep:
+	kvfree(nf_conntrack_hash);
+	return ret;
+}
+
+static const struct nf_ct_hook nf_conntrack_hook = {
+	.update		= nf_conntrack_update,
+	.destroy	= nf_ct_destroy,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
+	.attach		= nf_conntrack_attach,
+};
+
+void nf_conntrack_init_end(void)
+{
+	RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook);
+}
+
+/*
+ * We need to use special "null" values, not used in hash table
+ */
+#define UNCONFIRMED_NULLS_VAL	((1<<30)+0)
+
+int nf_conntrack_init_net(struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	int ret = -ENOMEM;
+
+	BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
+	BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
+	atomic_set(&cnet->count, 0);
+
+	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+	if (!net->ct.stat)
+		return ret;
+
+	ret = nf_conntrack_expect_pernet_init(net);
+	if (ret < 0)
+		goto err_expect;
+
+	nf_conntrack_acct_pernet_init(net);
+	nf_conntrack_tstamp_pernet_init(net);
+	nf_conntrack_ecache_pernet_init(net);
+	nf_conntrack_proto_pernet_init(net);
+
+	return 0;
+
+err_expect:
+	free_percpu(net->ct.stat);
+	return ret;
+}
+
+/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout)
+{
+	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+		return -EPERM;
+
+	__nf_ct_set_timeout(ct, timeout);
+
+	if (test_bit(IPS_DYING_BIT, &ct->status))
+		return -ETIME;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_timeout);
+
+void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off)
+{
+	unsigned int bit;
+
+	/* Ignore these unchangable bits */
+	on &= ~IPS_UNCHANGEABLE_MASK;
+	off &= ~IPS_UNCHANGEABLE_MASK;
+
+	for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+		if (on & (1 << bit))
+			set_bit(bit, &ct->status);
+		else if (off & (1 << bit))
+			clear_bit(bit, &ct->status);
+	}
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_status);
+
+int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status)
+{
+	unsigned long d;
+
+	d = ct->status ^ status;
+
+	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+		/* unchangeable */
+		return -EBUSY;
+
+	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+		/* SEEN_REPLY bit can only be set */
+		return -EBUSY;
+
+	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+		/* ASSURED bit can only be set */
+		return -EBUSY;
+
+	__nf_ct_change_status(ct, status, 0);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
new file mode 100644
index 000000000..69948e1d6
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Event cache for netfilter. */
+
+/*
+ * (C) 2005 Harald Welte <laforge@gnumonks.org>
+ * (C) 2005 Patrick McHardy <kaber@trash.net>
+ * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
+
+#define DYING_NULLS_VAL			((1 << 30) + 1)
+#define ECACHE_MAX_JIFFIES		msecs_to_jiffies(10)
+#define ECACHE_RETRY_JIFFIES		msecs_to_jiffies(10)
+
+enum retry_state {
+	STATE_CONGESTED,
+	STATE_RESTART,
+	STATE_DONE,
+};
+
+struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	return &cnet->ecache;
+}
+#if IS_MODULE(CONFIG_NF_CT_NETLINK)
+EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache);
+#endif
+
+static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
+{
+	unsigned long stop = jiffies + ECACHE_MAX_JIFFIES;
+	struct hlist_nulls_head evicted_list;
+	enum retry_state ret = STATE_DONE;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	unsigned int sent;
+
+	INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL);
+
+next:
+	sent = 0;
+	spin_lock_bh(&cnet->ecache.dying_lock);
+
+	hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) {
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		/* The worker owns all entries, ct remains valid until nf_ct_put
+		 * in the loop below.
+		 */
+		if (nf_conntrack_event(IPCT_DESTROY, ct)) {
+			ret = STATE_CONGESTED;
+			break;
+		}
+
+		hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+		hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list);
+
+		if (time_after(stop, jiffies)) {
+			ret = STATE_RESTART;
+			break;
+		}
+
+		if (sent++ > 16) {
+			spin_unlock_bh(&cnet->ecache.dying_lock);
+			cond_resched();
+			goto next;
+		}
+	}
+
+	spin_unlock_bh(&cnet->ecache.dying_lock);
+
+	hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) {
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
+		nf_ct_put(ct);
+
+		cond_resched();
+	}
+
+	return ret;
+}
+
+static void ecache_work(struct work_struct *work)
+{
+	struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+	int ret, delay = -1;
+
+	ret = ecache_work_evict_list(cnet);
+	switch (ret) {
+	case STATE_CONGESTED:
+		delay = ECACHE_RETRY_JIFFIES;
+		break;
+	case STATE_RESTART:
+		delay = 0;
+		break;
+	case STATE_DONE:
+		break;
+	}
+
+	if (delay >= 0)
+		schedule_delayed_work(&cnet->ecache.dwork, delay);
+}
+
+static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
+					   const u32 events,
+					   const u32 missed,
+					   const struct nf_ct_event *item)
+{
+	struct net *net = nf_ct_net(item->ct);
+	struct nf_ct_event_notifier *notify;
+	u32 old, want;
+	int ret;
+
+	if (!((events | missed) & e->ctmask))
+		return 0;
+
+	rcu_read_lock();
+
+	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
+	if (!notify) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	ret = notify->ct_event(events | missed, item);
+	rcu_read_unlock();
+
+	if (likely(ret >= 0 && missed == 0))
+		return 0;
+
+	do {
+		old = READ_ONCE(e->missed);
+		if (ret < 0)
+			want = old | events;
+		else
+			want = old & ~missed;
+	} while (cmpxchg(&e->missed, old, want) != old);
+
+	return ret;
+}
+
+int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
+				  u32 portid, int report)
+{
+	struct nf_conntrack_ecache *e;
+	struct nf_ct_event item;
+	unsigned int missed;
+	int ret;
+
+	if (!nf_ct_is_confirmed(ct))
+		return 0;
+
+	e = nf_ct_ecache_find(ct);
+	if (!e)
+		return 0;
+
+	memset(&item, 0, sizeof(item));
+
+	item.ct = ct;
+	item.portid = e->portid ? e->portid : portid;
+	item.report = report;
+
+	/* This is a resent of a destroy event? If so, skip missed */
+	missed = e->portid ? 0 : e->missed;
+
+	ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
+	if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
+		/* This is a destroy event that has been triggered by a process,
+		 * we store the PORTID to include it in the retransmission.
+		 */
+		if (e->portid == 0 && portid != 0)
+			e->portid = portid;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
+
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
+{
+	struct nf_conntrack_ecache *e;
+	struct nf_ct_event item;
+	unsigned int events;
+
+	if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
+		return;
+
+	e = nf_ct_ecache_find(ct);
+	if (e == NULL)
+		return;
+
+	events = xchg(&e->cache, 0);
+
+	item.ct = ct;
+	item.portid = 0;
+	item.report = 0;
+
+	/* We make a copy of the missed event cache without taking
+	 * the lock, thus we may send missed events twice. However,
+	 * this does not harm and it happens very rarely.
+	 */
+	__nf_conntrack_eventmask_report(e, events, e->missed, &item);
+}
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+
+void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+			       struct nf_conntrack_expect *exp,
+			       u32 portid, int report)
+
+{
+	struct net *net = nf_ct_exp_net(exp);
+	struct nf_ct_event_notifier *notify;
+	struct nf_conntrack_ecache *e;
+
+	rcu_read_lock();
+	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
+	if (!notify)
+		goto out_unlock;
+
+	e = nf_ct_ecache_find(exp->master);
+	if (!e)
+		goto out_unlock;
+
+	if (e->expmask & (1 << event)) {
+		struct nf_exp_event item = {
+			.exp	= exp,
+			.portid	= portid,
+			.report = report
+		};
+		notify->exp_event(1 << event, &item);
+	}
+out_unlock:
+	rcu_read_unlock();
+}
+
+void nf_conntrack_register_notifier(struct net *net,
+				    const struct nf_ct_event_notifier *new)
+{
+	struct nf_ct_event_notifier *notify;
+
+	mutex_lock(&nf_ct_ecache_mutex);
+	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	WARN_ON_ONCE(notify);
+	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
+	mutex_unlock(&nf_ct_ecache_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+void nf_conntrack_unregister_notifier(struct net *net)
+{
+	mutex_lock(&nf_ct_ecache_mutex);
+	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
+	mutex_unlock(&nf_ct_ecache_mutex);
+	/* synchronize_rcu() is called after netns pre_exit */
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	if (state == NFCT_ECACHE_DESTROY_FAIL &&
+	    !delayed_work_pending(&cnet->ecache.dwork)) {
+		schedule_delayed_work(&cnet->ecache.dwork, HZ);
+		net->ct.ecache_dwork_pending = true;
+	} else if (state == NFCT_ECACHE_DESTROY_SENT) {
+		if (!hlist_nulls_empty(&cnet->ecache.dying_list))
+			mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
+		else
+			net->ct.ecache_dwork_pending = false;
+	}
+}
+
+bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *e;
+
+	switch (net->ct.sysctl_events) {
+	case 0:
+		 /* assignment via template / ruleset? ignore sysctl. */
+		if (ctmask || expmask)
+			break;
+		return true;
+	case 2: /* autodetect: no event listener, don't allocate extension. */
+		if (!READ_ONCE(nf_ctnetlink_has_listener))
+			return true;
+		fallthrough;
+	case 1:
+		/* always allocate an extension. */
+		if (!ctmask && !expmask) {
+			ctmask = ~0;
+			expmask = ~0;
+		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return true;
+	}
+
+	e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
+	if (e) {
+		e->ctmask  = ctmask;
+		e->expmask = expmask;
+	}
+
+	return e != NULL;
+}
+EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add);
+
+#define NF_CT_EVENTS_DEFAULT 2
+static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+
+void nf_conntrack_ecache_pernet_init(struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	net->ct.sysctl_events = nf_ct_events;
+
+	INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
+	INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL);
+	spin_lock_init(&cnet->ecache.dying_lock);
+
+	BUILD_BUG_ON(__IPCT_MAX >= 16);	/* e->ctmask is u16 */
+}
+
+void nf_conntrack_ecache_pernet_fini(struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	cancel_delayed_work_sync(&cnet->ecache.dwork);
+}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
new file mode 100644
index 000000000..96948e98e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -0,0 +1,745 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Expectation handling for nf_conntrack. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/siphash.h>
+#include <linux/moduleparam.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/netns/hash.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+unsigned int nf_ct_expect_hsize __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
+
+struct hlist_head *nf_ct_expect_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
+
+unsigned int nf_ct_expect_max __read_mostly;
+
+static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static siphash_aligned_key_t nf_ct_expect_hashrnd;
+
+/* nf_conntrack_expect helper functions */
+void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+				u32 portid, int report)
+{
+	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct net *net = nf_ct_exp_net(exp);
+	struct nf_conntrack_net *cnet;
+
+	WARN_ON(!master_help);
+	WARN_ON(timer_pending(&exp->timeout));
+
+	hlist_del_rcu(&exp->hnode);
+
+	cnet = nf_ct_pernet(net);
+	cnet->expect_count--;
+
+	hlist_del_rcu(&exp->lnode);
+	master_help->expecting[exp->class]--;
+
+	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
+	nf_ct_expect_put(exp);
+
+	NF_CT_STAT_INC(net, expect_delete);
+}
+EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
+
+static void nf_ct_expectation_timed_out(struct timer_list *t)
+{
+	struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	nf_ct_unlink_expect(exp);
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+	nf_ct_expect_put(exp);
+}
+
+static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
+{
+	struct {
+		union nf_inet_addr dst_addr;
+		u32 net_mix;
+		u16 dport;
+		u8 l3num;
+		u8 protonum;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
+	u32 hash;
+
+	get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
+
+	memset(&combined, 0, sizeof(combined));
+
+	combined.dst_addr = tuple->dst.u3;
+	combined.net_mix = net_hash_mix(n);
+	combined.dport = (__force __u16)tuple->dst.u.all;
+	combined.l3num = tuple->src.l3num;
+	combined.protonum = tuple->dst.protonum;
+
+	hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd);
+
+	return reciprocal_scale(hash, nf_ct_expect_hsize);
+}
+
+static bool
+nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+		const struct nf_conntrack_expect *i,
+		const struct nf_conntrack_zone *zone,
+		const struct net *net)
+{
+	return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+	       net_eq(net, nf_ct_net(i->master)) &&
+	       nf_ct_zone_equal_any(i->master, zone);
+}
+
+bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
+{
+	if (del_timer(&exp->timeout)) {
+		nf_ct_unlink_expect(exp);
+		nf_ct_expect_put(exp);
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
+
+struct nf_conntrack_expect *
+__nf_ct_expect_find(struct net *net,
+		    const struct nf_conntrack_zone *zone,
+		    const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	struct nf_conntrack_expect *i;
+	unsigned int h;
+
+	if (!cnet->expect_count)
+		return NULL;
+
+	h = nf_ct_expect_dst_hash(net, tuple);
+	hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
+		if (nf_ct_exp_equal(tuple, i, zone, net))
+			return i;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_ct_expect_find_get(struct net *net,
+		      const struct nf_conntrack_zone *zone,
+		      const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+
+	rcu_read_lock();
+	i = __nf_ct_expect_find(net, zone, tuple);
+	if (i && !refcount_inc_not_zero(&i->use))
+		i = NULL;
+	rcu_read_unlock();
+
+	return i;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+struct nf_conntrack_expect *
+nf_ct_find_expectation(struct net *net,
+		       const struct nf_conntrack_zone *zone,
+		       const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	struct nf_conntrack_expect *i, *exp = NULL;
+	unsigned int h;
+
+	if (!cnet->expect_count)
+		return NULL;
+
+	h = nf_ct_expect_dst_hash(net, tuple);
+	hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
+		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
+		    nf_ct_exp_equal(tuple, i, zone, net)) {
+			exp = i;
+			break;
+		}
+	}
+	if (!exp)
+		return NULL;
+
+	/* If master is not in hash table yet (ie. packet hasn't left
+	   this machine yet), how can other end know about expected?
+	   Hence these are not the droids you are looking for (if
+	   master ct never got confirmed, we'd hold a reference to it
+	   and weird things would happen to future packets). */
+	if (!nf_ct_is_confirmed(exp->master))
+		return NULL;
+
+	/* Avoid race with other CPUs, that for exp->master ct, is
+	 * about to invoke ->destroy(), or nf_ct_delete() via timeout
+	 * or early_drop().
+	 *
+	 * The refcount_inc_not_zero() check tells:  If that fails, we
+	 * know that the ct is being destroyed.  If it succeeds, we
+	 * can be sure the ct cannot disappear underneath.
+	 */
+	if (unlikely(nf_ct_is_dying(exp->master) ||
+		     !refcount_inc_not_zero(&exp->master->ct_general.use)))
+		return NULL;
+
+	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+		refcount_inc(&exp->use);
+		return exp;
+	} else if (del_timer(&exp->timeout)) {
+		nf_ct_unlink_expect(exp);
+		return exp;
+	}
+	/* Undo exp->master refcnt increase, if del_timer() failed */
+	nf_ct_put(exp->master);
+
+	return NULL;
+}
+
+/* delete all expectations for this conntrack */
+void nf_ct_remove_expectations(struct nf_conn *ct)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *next;
+
+	/* Optimization: most connection never expect any others. */
+	if (!help)
+		return;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
+		nf_ct_remove_expect(exp);
+	}
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct nf_conntrack_expect *a,
+			       const struct nf_conntrack_expect *b)
+{
+	/* Part covered by intersection of masks must be unequal,
+	   otherwise they clash */
+	struct nf_conntrack_tuple_mask intersect_mask;
+	int count;
+
+	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
+
+	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+		intersect_mask.src.u3.all[count] =
+			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
+	}
+
+	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
+	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
+	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
+}
+
+static inline int expect_matches(const struct nf_conntrack_expect *a,
+				 const struct nf_conntrack_expect *b)
+{
+	return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+	       nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
+	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
+}
+
+static bool master_matches(const struct nf_conntrack_expect *a,
+			   const struct nf_conntrack_expect *b,
+			   unsigned int flags)
+{
+	if (flags & NF_CT_EXP_F_SKIP_MASTER)
+		return true;
+
+	return a->master == b->master;
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
+{
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	nf_ct_remove_expect(exp);
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
+
+/* We don't increase the master conntrack refcount for non-fulfilled
+ * conntracks. During the conntrack destruction, the expectations are
+ * always killed before the conntrack itself */
+struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
+{
+	struct nf_conntrack_expect *new;
+
+	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
+	if (!new)
+		return NULL;
+
+	new->master = me;
+	refcount_set(&new->use, 1);
+	return new;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
+
+void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
+		       u_int8_t family,
+		       const union nf_inet_addr *saddr,
+		       const union nf_inet_addr *daddr,
+		       u_int8_t proto, const __be16 *src, const __be16 *dst)
+{
+	int len;
+
+	if (family == AF_INET)
+		len = 4;
+	else
+		len = 16;
+
+	exp->flags = 0;
+	exp->class = class;
+	exp->expectfn = NULL;
+	exp->helper = NULL;
+	exp->tuple.src.l3num = family;
+	exp->tuple.dst.protonum = proto;
+
+	if (saddr) {
+		memcpy(&exp->tuple.src.u3, saddr, len);
+		if (sizeof(exp->tuple.src.u3) > len)
+			/* address needs to be cleared for nf_ct_tuple_equal */
+			memset((void *)&exp->tuple.src.u3 + len, 0x00,
+			       sizeof(exp->tuple.src.u3) - len);
+		memset(&exp->mask.src.u3, 0xFF, len);
+		if (sizeof(exp->mask.src.u3) > len)
+			memset((void *)&exp->mask.src.u3 + len, 0x00,
+			       sizeof(exp->mask.src.u3) - len);
+	} else {
+		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
+		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
+	}
+
+	if (src) {
+		exp->tuple.src.u.all = *src;
+		exp->mask.src.u.all = htons(0xFFFF);
+	} else {
+		exp->tuple.src.u.all = 0;
+		exp->mask.src.u.all = 0;
+	}
+
+	memcpy(&exp->tuple.dst.u3, daddr, len);
+	if (sizeof(exp->tuple.dst.u3) > len)
+		/* address needs to be cleared for nf_ct_tuple_equal */
+		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
+		       sizeof(exp->tuple.dst.u3) - len);
+
+	exp->tuple.dst.u.all = *dst;
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+	memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
+	memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_init);
+
+static void nf_ct_expect_free_rcu(struct rcu_head *head)
+{
+	struct nf_conntrack_expect *exp;
+
+	exp = container_of(head, struct nf_conntrack_expect, rcu);
+	kmem_cache_free(nf_ct_expect_cachep, exp);
+}
+
+void nf_ct_expect_put(struct nf_conntrack_expect *exp)
+{
+	if (refcount_dec_and_test(&exp->use))
+		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_put);
+
+static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+{
+	struct nf_conntrack_net *cnet;
+	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct nf_conntrack_helper *helper;
+	struct net *net = nf_ct_exp_net(exp);
+	unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
+
+	/* two references : one for hash insert, one for the timer */
+	refcount_add(2, &exp->use);
+
+	timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_expect_lock));
+	if (helper) {
+		exp->timeout.expires = jiffies +
+			helper->expect_policy[exp->class].timeout * HZ;
+	}
+	add_timer(&exp->timeout);
+
+	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+	master_help->expecting[exp->class]++;
+
+	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+	cnet = nf_ct_pernet(net);
+	cnet->expect_count++;
+
+	NF_CT_STAT_INC(net, expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct nf_conn *master,
+				struct nf_conntrack_expect *new)
+{
+	struct nf_conn_help *master_help = nfct_help(master);
+	struct nf_conntrack_expect *exp, *last = NULL;
+
+	hlist_for_each_entry(exp, &master_help->expectations, lnode) {
+		if (exp->class == new->class)
+			last = exp;
+	}
+
+	if (last)
+		nf_ct_remove_expect(last);
+}
+
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
+{
+	const struct nf_conntrack_expect_policy *p;
+	struct nf_conntrack_expect *i;
+	struct nf_conntrack_net *cnet;
+	struct nf_conn *master = expect->master;
+	struct nf_conn_help *master_help = nfct_help(master);
+	struct nf_conntrack_helper *helper;
+	struct net *net = nf_ct_exp_net(expect);
+	struct hlist_node *next;
+	unsigned int h;
+	int ret = 0;
+
+	if (!master_help) {
+		ret = -ESHUTDOWN;
+		goto out;
+	}
+	h = nf_ct_expect_dst_hash(net, &expect->tuple);
+	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
+		if (master_matches(i, expect, flags) &&
+		    expect_matches(i, expect)) {
+			if (i->class != expect->class ||
+			    i->master != expect->master)
+				return -EALREADY;
+
+			if (nf_ct_remove_expect(i))
+				break;
+		} else if (expect_clash(i, expect)) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+	/* Will be over limit? */
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_expect_lock));
+	if (helper) {
+		p = &helper->expect_policy[expect->class];
+		if (p->max_expected &&
+		    master_help->expecting[expect->class] >= p->max_expected) {
+			evict_oldest_expect(master, expect);
+			if (master_help->expecting[expect->class]
+						>= p->max_expected) {
+				ret = -EMFILE;
+				goto out;
+			}
+		}
+	}
+
+	cnet = nf_ct_pernet(net);
+	if (cnet->expect_count >= nf_ct_expect_max) {
+		net_warn_ratelimited("nf_conntrack: expectation table full\n");
+		ret = -EMFILE;
+	}
+out:
+	return ret;
+}
+
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+				u32 portid, int report, unsigned int flags)
+{
+	int ret;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	ret = __nf_ct_expect_check(expect, flags);
+	if (ret < 0)
+		goto out;
+
+	nf_ct_expect_insert(expect);
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
+	return 0;
+out:
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+				  void *data)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+			      void *data,
+			      u32 portid, int report)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+
+			if (!net_eq(nf_ct_exp_net(exp), net))
+				continue;
+
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect_report(exp, portid, report);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+struct ct_expect_iter_state {
+	struct seq_net_private p;
+	unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
+{
+	struct ct_expect_iter_state *st = seq->private;
+	struct hlist_node *n;
+
+	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+		n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
+		if (n)
+			return n;
+	}
+	return NULL;
+}
+
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+					     struct hlist_node *head)
+{
+	struct ct_expect_iter_state *st = seq->private;
+
+	head = rcu_dereference(hlist_next_rcu(head));
+	while (head == NULL) {
+		if (++st->bucket >= nf_ct_expect_hsize)
+			return NULL;
+		head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
+	}
+	return head;
+}
+
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct hlist_node *head = ct_expect_get_first(seq);
+
+	if (head)
+		while (pos && (head = ct_expect_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return ct_expect_get_idx(seq, *pos);
+}
+
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ct_expect_get_next(seq, v);
+}
+
+static void exp_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+	struct nf_conntrack_expect *expect;
+	struct nf_conntrack_helper *helper;
+	struct hlist_node *n = v;
+	char *delim = "";
+
+	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
+
+	if (expect->timeout.function)
+		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+	else
+		seq_puts(s, "- ");
+	seq_printf(s, "l3proto = %u proto=%u ",
+		   expect->tuple.src.l3num,
+		   expect->tuple.dst.protonum);
+	print_tuple(s, &expect->tuple,
+		    nf_ct_l4proto_find(expect->tuple.dst.protonum));
+
+	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
+		seq_puts(s, "PERMANENT");
+		delim = ",";
+	}
+	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
+		seq_printf(s, "%sINACTIVE", delim);
+		delim = ",";
+	}
+	if (expect->flags & NF_CT_EXPECT_USERSPACE)
+		seq_printf(s, "%sUSERSPACE", delim);
+
+	helper = rcu_dereference(nfct_help(expect->master)->helper);
+	if (helper) {
+		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
+		if (helper->expect_policy[expect->class].name[0])
+			seq_printf(s, "/%s",
+				   helper->expect_policy[expect->class].name);
+	}
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static const struct seq_operations exp_seq_ops = {
+	.start = exp_seq_start,
+	.next = exp_seq_next,
+	.stop = exp_seq_stop,
+	.show = exp_seq_show
+};
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
+
+static int exp_proc_init(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	struct proc_dir_entry *proc;
+	kuid_t root_uid;
+	kgid_t root_gid;
+
+	proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
+			&exp_seq_ops, sizeof(struct ct_expect_iter_state));
+	if (!proc)
+		return -ENOMEM;
+
+	root_uid = make_kuid(net->user_ns, 0);
+	root_gid = make_kgid(net->user_ns, 0);
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(proc, root_uid, root_gid);
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
+	return 0;
+}
+
+static void exp_proc_remove(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	remove_proc_entry("nf_conntrack_expect", net->proc_net);
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
+}
+
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
+
+int nf_conntrack_expect_pernet_init(struct net *net)
+{
+	return exp_proc_init(net);
+}
+
+void nf_conntrack_expect_pernet_fini(struct net *net)
+{
+	exp_proc_remove(net);
+}
+
+int nf_conntrack_expect_init(void)
+{
+	if (!nf_ct_expect_hsize) {
+		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+		if (!nf_ct_expect_hsize)
+			nf_ct_expect_hsize = 1;
+	}
+	nf_ct_expect_max = nf_ct_expect_hsize * 4;
+	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+				sizeof(struct nf_conntrack_expect),
+				0, 0, NULL);
+	if (!nf_ct_expect_cachep)
+		return -ENOMEM;
+
+	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
+	if (!nf_ct_expect_hash) {
+		kmem_cache_destroy(nf_ct_expect_cachep);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+	rcu_barrier(); /* Wait for call_rcu() before destroy */
+	kmem_cache_destroy(nf_ct_expect_cachep);
+	kvfree(nf_ct_expect_hash);
+}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
new file mode 100644
index 000000000..dd62cc12e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Structure dynamic extension infrastructure
+ * Copyright (C) 2004 Rusty Russell IBM Corporation
+ * Copyright (C) 2007 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2007 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ */
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
+#include <net/netfilter/nf_nat.h>
+
+#define NF_CT_EXT_PREALLOC	128u /* conntrack events are on by default */
+
+atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1);
+
+static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
+	[NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help),
+#if IS_ENABLED(CONFIG_NF_NAT)
+	[NF_CT_EXT_NAT] = sizeof(struct nf_conn_nat),
+#endif
+	[NF_CT_EXT_SEQADJ] = sizeof(struct nf_conn_seqadj),
+	[NF_CT_EXT_ACCT] = sizeof(struct nf_conn_acct),
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	[NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	[NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	[NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	[NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+	[NF_CT_EXT_SYNPROXY] = sizeof(struct nf_conn_synproxy),
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	[NF_CT_EXT_ACT_CT] = sizeof(struct nf_conn_act_ct_ext),
+#endif
+};
+
+static __always_inline unsigned int total_extension_size(void)
+{
+	/* remember to add new extensions below */
+	BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
+
+	return sizeof(struct nf_ct_ext) +
+	       sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+		+ sizeof(struct nf_conn_nat)
+#endif
+		+ sizeof(struct nf_conn_seqadj)
+		+ sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+		+ sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+		+ sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+		+ sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+		+ sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+		+ sizeof(struct nf_conn_synproxy)
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+		+ sizeof(struct nf_conn_act_ct_ext)
+#endif
+	;
+}
+
+void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
+{
+	unsigned int newlen, newoff, oldlen, alloc;
+	struct nf_ct_ext *new;
+
+	/* Conntrack must not be confirmed to avoid races on reallocation. */
+	WARN_ON(nf_ct_is_confirmed(ct));
+
+	/* struct nf_ct_ext uses u8 to store offsets/size */
+	BUILD_BUG_ON(total_extension_size() > 255u);
+
+	if (ct->ext) {
+		const struct nf_ct_ext *old = ct->ext;
+
+		if (__nf_ct_ext_exist(old, id))
+			return NULL;
+		oldlen = old->len;
+	} else {
+		oldlen = sizeof(*new);
+	}
+
+	newoff = ALIGN(oldlen, __alignof__(struct nf_ct_ext));
+	newlen = newoff + nf_ct_ext_type_len[id];
+
+	alloc = max(newlen, NF_CT_EXT_PREALLOC);
+	new = krealloc(ct->ext, alloc, gfp);
+	if (!new)
+		return NULL;
+
+	if (!ct->ext) {
+		memset(new->offset, 0, sizeof(new->offset));
+		new->gen_id = atomic_read(&nf_conntrack_ext_genid);
+	}
+
+	new->offset[id] = newoff;
+	new->len = newlen;
+	memset((void *)new + newoff, 0, newlen - newoff);
+
+	ct->ext = new;
+	return (void *)new + newoff;
+}
+EXPORT_SYMBOL(nf_ct_ext_add);
+
+/* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */
+void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id)
+{
+	unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid);
+	unsigned int this_id = READ_ONCE(ext->gen_id);
+
+	if (!__nf_ct_ext_exist(ext, id))
+		return NULL;
+
+	if (this_id == 0 || ext->gen_id == gen_id)
+		return (void *)ext + ext->offset[id];
+
+	return NULL;
+}
+EXPORT_SYMBOL(__nf_ct_ext_find);
+
+void nf_ct_ext_bump_genid(void)
+{
+	unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid);
+
+	if (value == UINT_MAX)
+		atomic_set(&nf_conntrack_ext_genid, 1);
+
+	msleep(HZ);
+}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
new file mode 100644
index 000000000..617f744a2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -0,0 +1,604 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* FTP extension for connection tracking. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/ipv6.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+#define HELPER_NAME "ftp"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp connection tracking helper");
+MODULE_ALIAS("ip_conntrack_ftp");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+static DEFINE_SPINLOCK(nf_ftp_lock);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+static bool loose;
+module_param(loose, bool, 0600);
+
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
+				enum ip_conntrack_info ctinfo,
+				enum nf_ct_ftp_type type,
+				unsigned int protoff,
+				unsigned int matchoff,
+				unsigned int matchlen,
+				struct nf_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
+
+static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
+		      char, unsigned int *);
+static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
+		       char, unsigned int *);
+static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
+		    char, unsigned int *);
+static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
+			     char, unsigned int *);
+
+static struct ftp_search {
+	const char *pattern;
+	size_t plen;
+	char skip;
+	char term;
+	enum nf_ct_ftp_type ftptype;
+	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
+} search[IP_CT_DIR_MAX][2] = {
+	[IP_CT_DIR_ORIGINAL] = {
+		{
+			.pattern	= "PORT",
+			.plen		= sizeof("PORT") - 1,
+			.skip		= ' ',
+			.term		= '\r',
+			.ftptype	= NF_CT_FTP_PORT,
+			.getnum		= try_rfc959,
+		},
+		{
+			.pattern	= "EPRT",
+			.plen		= sizeof("EPRT") - 1,
+			.skip		= ' ',
+			.term		= '\r',
+			.ftptype	= NF_CT_FTP_EPRT,
+			.getnum		= try_eprt,
+		},
+	},
+	[IP_CT_DIR_REPLY] = {
+		{
+			.pattern	= "227 ",
+			.plen		= sizeof("227 ") - 1,
+			.ftptype	= NF_CT_FTP_PASV,
+			.getnum		= try_rfc1123,
+		},
+		{
+			.pattern	= "229 ",
+			.plen		= sizeof("229 ") - 1,
+			.skip		= '(',
+			.term		= ')',
+			.ftptype	= NF_CT_FTP_EPSV,
+			.getnum		= try_epsv_response,
+		},
+	},
+};
+
+static int
+get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
+{
+	const char *end;
+	int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
+	if (ret > 0)
+		return (int)(end - src);
+	return 0;
+}
+
+static int try_number(const char *data, size_t dlen, u_int32_t array[],
+		      int array_size, char sep, char term)
+{
+	u_int32_t i, len;
+
+	memset(array, 0, sizeof(array[0])*array_size);
+
+	/* Keep data pointing at next char. */
+	for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
+		if (*data >= '0' && *data <= '9') {
+			array[i] = array[i]*10 + *data - '0';
+		}
+		else if (*data == sep)
+			i++;
+		else {
+			/* Unexpected character; true if it's the
+			   terminator (or we don't care about one)
+			   and we're finished. */
+			if ((*data == term || !term) && i == array_size - 1)
+				return len;
+
+			pr_debug("Char %u (got %u nums) `%u' unexpected\n",
+				 len, i, *data);
+			return 0;
+		}
+	}
+	pr_debug("Failed to fill %u numbers separated by %c\n",
+		 array_size, sep);
+	return 0;
+}
+
+/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+static int try_rfc959(const char *data, size_t dlen,
+		      struct nf_conntrack_man *cmd, char term,
+		      unsigned int *offset)
+{
+	int length;
+	u_int32_t array[6];
+
+	length = try_number(data, dlen, array, 6, ',', term);
+	if (length == 0)
+		return 0;
+
+	cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
+				    (array[2] << 8) | array[3]);
+	cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
+	return length;
+}
+
+/*
+ * From RFC 1123:
+ * The format of the 227 reply to a PASV command is not
+ * well standardized.  In particular, an FTP client cannot
+ * assume that the parentheses shown on page 40 of RFC-959
+ * will be present (and in fact, Figure 3 on page 43 omits
+ * them).  Therefore, a User-FTP program that interprets
+ * the PASV reply must scan the reply for the first digit
+ * of the host and port numbers.
+ */
+static int try_rfc1123(const char *data, size_t dlen,
+		       struct nf_conntrack_man *cmd, char term,
+		       unsigned int *offset)
+{
+	int i;
+	for (i = 0; i < dlen; i++)
+		if (isdigit(data[i]))
+			break;
+
+	if (i == dlen)
+		return 0;
+
+	*offset += i;
+
+	return try_rfc959(data + i, dlen - i, cmd, 0, offset);
+}
+
+/* Grab port: number up to delimiter */
+static int get_port(const char *data, int start, size_t dlen, char delim,
+		    __be16 *port)
+{
+	u_int16_t tmp_port = 0;
+	int i;
+
+	for (i = start; i < dlen; i++) {
+		/* Finished? */
+		if (data[i] == delim) {
+			if (tmp_port == 0)
+				break;
+			*port = htons(tmp_port);
+			pr_debug("get_port: return %d\n", tmp_port);
+			return i + 1;
+		}
+		else if (data[i] >= '0' && data[i] <= '9')
+			tmp_port = tmp_port*10 + data[i] - '0';
+		else { /* Some other crap */
+			pr_debug("get_port: invalid char.\n");
+			break;
+		}
+	}
+	return 0;
+}
+
+/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
+static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
+		    char term, unsigned int *offset)
+{
+	char delim;
+	int length;
+
+	/* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
+	   then delimiter again. */
+	if (dlen <= 3) {
+		pr_debug("EPRT: too short\n");
+		return 0;
+	}
+	delim = data[0];
+	if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
+		pr_debug("try_eprt: invalid delimiter.\n");
+		return 0;
+	}
+
+	if ((cmd->l3num == PF_INET && data[1] != '1') ||
+	    (cmd->l3num == PF_INET6 && data[1] != '2')) {
+		pr_debug("EPRT: invalid protocol number.\n");
+		return 0;
+	}
+
+	pr_debug("EPRT: Got %c%c%c\n", delim, data[1], delim);
+
+	if (data[1] == '1') {
+		u_int32_t array[4];
+
+		/* Now we have IP address. */
+		length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+		if (length != 0)
+			cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
+					   | (array[2] << 8) | array[3]);
+	} else {
+		/* Now we have IPv6 address. */
+		length = get_ipv6_addr(data + 3, dlen - 3,
+				       (struct in6_addr *)cmd->u3.ip6, delim);
+	}
+
+	if (length == 0)
+		return 0;
+	pr_debug("EPRT: Got IP address!\n");
+	/* Start offset includes initial "|1|", and trailing delimiter */
+	return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Returns 0, or length of numbers: |||6446| */
+static int try_epsv_response(const char *data, size_t dlen,
+			     struct nf_conntrack_man *cmd, char term,
+			     unsigned int *offset)
+{
+	char delim;
+
+	/* Three delimiters. */
+	if (dlen <= 3) return 0;
+	delim = data[0];
+	if (isdigit(delim) || delim < 33 || delim > 126 ||
+	    data[1] != delim || data[2] != delim)
+		return 0;
+
+	return get_port(data, 3, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Return 1 for match, 0 for accept, -1 for partial. */
+static int find_pattern(const char *data, size_t dlen,
+			const char *pattern, size_t plen,
+			char skip, char term,
+			unsigned int *numoff,
+			unsigned int *numlen,
+			struct nf_conntrack_man *cmd,
+			int (*getnum)(const char *, size_t,
+				      struct nf_conntrack_man *, char,
+				      unsigned int *))
+{
+	size_t i = plen;
+
+	pr_debug("find_pattern `%s': dlen = %zu\n", pattern, dlen);
+
+	if (dlen <= plen) {
+		/* Short packet: try for partial? */
+		if (strncasecmp(data, pattern, dlen) == 0)
+			return -1;
+		else return 0;
+	}
+
+	if (strncasecmp(data, pattern, plen) != 0)
+		return 0;
+
+	pr_debug("Pattern matches!\n");
+	/* Now we've found the constant string, try to skip
+	   to the 'skip' character */
+	if (skip) {
+		for (i = plen; data[i] != skip; i++)
+			if (i == dlen - 1) return -1;
+
+		/* Skip over the last character */
+		i++;
+	}
+
+	pr_debug("Skipped up to 0x%hhx delimiter!\n", skip);
+
+	*numoff = i;
+	*numlen = getnum(data + i, dlen - i, cmd, term, numoff);
+	if (!*numlen)
+		return -1;
+
+	pr_debug("Match succeeded!\n");
+	return 1;
+}
+
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+		if (info->seq_aft_nl[dir][i] == seq)
+			return 1;
+	return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
+			  struct nf_ct_ftp_master *info, int dir,
+			  struct sk_buff *skb)
+{
+	unsigned int i, oldest;
+
+	/* Look for oldest: if we find exact match, we're done. */
+	for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+		if (info->seq_aft_nl[dir][i] == nl_seq)
+			return;
+	}
+
+	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
+		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+	} else {
+		if (before(info->seq_aft_nl[dir][0], info->seq_aft_nl[dir][1]))
+			oldest = 0;
+		else
+			oldest = 1;
+
+		if (after(nl_seq, info->seq_aft_nl[dir][oldest]))
+			info->seq_aft_nl[dir][oldest] = nl_seq;
+	}
+}
+
+static int help(struct sk_buff *skb,
+		unsigned int protoff,
+		struct nf_conn *ct,
+		enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	const char *fb_ptr;
+	int ret;
+	u32 seq;
+	int dir = CTINFO2DIR(ctinfo);
+	unsigned int matchlen, matchoff;
+	struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct);
+	struct nf_conntrack_expect *exp;
+	union nf_inet_addr *daddr;
+	struct nf_conntrack_man cmd = {};
+	unsigned int i;
+	int found = 0, ends_in_nl;
+	typeof(nf_nat_ftp_hook) nf_nat_ftp;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED_REPLY) {
+		pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
+		return NF_ACCEPT;
+	}
+
+	if (unlikely(skb_linearize(skb)))
+		return NF_DROP;
+
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	dataoff = protoff + th->doff * 4;
+	/* No data? */
+	if (dataoff >= skb->len) {
+		pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
+			 skb->len);
+		return NF_ACCEPT;
+	}
+	datalen = skb->len - dataoff;
+
+	/* seqadj (nat) uses ct->lock internally, nf_nat_ftp would cause deadlock */
+	spin_lock_bh(&nf_ftp_lock);
+	fb_ptr = skb->data + dataoff;
+
+	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+	seq = ntohl(th->seq) + datalen;
+
+	/* Look up to see if we're just after a \n. */
+	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+		/* We're picking up this, clear flags and let it continue */
+		if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) {
+			ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP;
+			goto skip_nl_seq;
+		}
+
+		/* Now if this ends in \n, update ftp info. */
+		pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
+			 ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
+			 ct_ftp_info->seq_aft_nl[dir][0],
+			 ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
+			 ct_ftp_info->seq_aft_nl[dir][1]);
+		ret = NF_ACCEPT;
+		goto out_update_nl;
+	}
+
+skip_nl_seq:
+	/* Initialize IP/IPv6 addr to expected address (it's not mentioned
+	   in EPSV responses) */
+	cmd.l3num = nf_ct_l3num(ct);
+	memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+	       sizeof(cmd.u3.all));
+
+	for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
+		found = find_pattern(fb_ptr, datalen,
+				     search[dir][i].pattern,
+				     search[dir][i].plen,
+				     search[dir][i].skip,
+				     search[dir][i].term,
+				     &matchoff, &matchlen,
+				     &cmd,
+				     search[dir][i].getnum);
+		if (found) break;
+	}
+	if (found == -1) {
+		/* We don't usually drop packets.  After all, this is
+		   connection tracking, not packet filtering.
+		   However, it is necessary for accurate tracking in
+		   this case. */
+		nf_ct_helper_log(skb, ct, "partial matching of `%s'",
+			         search[dir][i].pattern);
+		ret = NF_DROP;
+		goto out;
+	} else if (found == 0) { /* No match */
+		ret = NF_ACCEPT;
+		goto out_update_nl;
+	}
+
+	pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
+		 matchlen, fb_ptr + matchoff,
+		 matchlen, ntohl(th->seq) + matchoff);
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
+		ret = NF_DROP;
+		goto out;
+	}
+
+	/* We refer to the reverse direction ("!dir") tuples here,
+	 * because we're expecting something in the other direction.
+	 * Doesn't matter unless NAT is happening.  */
+	daddr = &ct->tuplehash[!dir].tuple.dst.u3;
+
+	/* Update the ftp info */
+	if ((cmd.l3num == nf_ct_l3num(ct)) &&
+	    memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+		     sizeof(cmd.u3.all))) {
+		/* Enrico Scholz's passive FTP to partially RNAT'd ftp
+		   server: it really wants us to connect to a
+		   different IP address.  Simply don't record it for
+		   NAT. */
+		if (cmd.l3num == PF_INET) {
+			pr_debug("NOT RECORDING: %pI4 != %pI4\n",
+				 &cmd.u3.ip,
+				 &ct->tuplehash[dir].tuple.src.u3.ip);
+		} else {
+			pr_debug("NOT RECORDING: %pI6 != %pI6\n",
+				 cmd.u3.ip6,
+				 ct->tuplehash[dir].tuple.src.u3.ip6);
+		}
+
+		/* Thanks to Cristiano Lincoln Mattos
+		   <lincoln@cesar.org.br> for reporting this potential
+		   problem (DMZ machines opening holes to internal
+		   networks, or the packet filter itself). */
+		if (!loose) {
+			ret = NF_ACCEPT;
+			goto out_put_expect;
+		}
+		daddr = &cmd.u3;
+	}
+
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, cmd.l3num,
+			  &ct->tuplehash[!dir].tuple.src.u3, daddr,
+			  IPPROTO_TCP, NULL, &cmd.u.tcp.port);
+
+	/* Now, NAT might want to mangle the packet, and register the
+	 * (possibly changed) expectation itself. */
+	nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
+	if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
+		ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
+				 protoff, matchoff, matchlen, exp);
+	else {
+		/* Can't expect this?  Best to drop packet now. */
+		if (nf_ct_expect_related(exp, 0) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
+			ret = NF_DROP;
+		} else
+			ret = NF_ACCEPT;
+	}
+
+out_put_expect:
+	nf_ct_expect_put(exp);
+
+out_update_nl:
+	/* Now if this ends in \n, update ftp info.  Seq may have been
+	 * adjusted by NAT code. */
+	if (ends_in_nl)
+		update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
+ out:
+	spin_unlock_bh(&nf_ftp_lock);
+	return ret;
+}
+
+static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
+{
+	struct nf_ct_ftp_master *ftp = nfct_help_data(ct);
+
+	/* This conntrack has been injected from user-space, always pick up
+	 * sequence tracking. Otherwise, the first FTP command after the
+	 * failover breaks.
+	 */
+	ftp->flags[IP_CT_DIR_ORIGINAL] |= NF_CT_FTP_SEQ_PICKUP;
+	ftp->flags[IP_CT_DIR_REPLY] |= NF_CT_FTP_SEQ_PICKUP;
+	return 0;
+}
+
+static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly;
+
+static const struct nf_conntrack_expect_policy ftp_exp_policy = {
+	.max_expected	= 1,
+	.timeout	= 5 * 60,
+};
+
+static void __exit nf_conntrack_ftp_fini(void)
+{
+	nf_conntrack_helpers_unregister(ftp, ports_c * 2);
+}
+
+static int __init nf_conntrack_ftp_init(void)
+{
+	int i, ret = 0;
+
+	NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
+
+	if (ports_c == 0)
+		ports[ports_c++] = FTP_PORT;
+
+	/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
+		 are tracked or not - YK */
+	for (i = 0; i < ports_c; i++) {
+		nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, FTP_PORT, ports[i], ports[i],
+				  &ftp_exp_policy, 0, help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
+		nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, FTP_PORT, ports[i], ports[i],
+				  &ftp_exp_policy, 0, help,
+				  nf_ct_ftp_from_nlattr, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+module_init(nf_conntrack_ftp_init);
+module_exit(nf_conntrack_ftp_fini);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
new file mode 100644
index 000000000..e697a824b
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * BER and PER decoding library for H.323 conntrack/NAT module.
+ *
+ * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * See nf_conntrack_helper_h323_asn1.h for details.
+ */
+
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#else
+#include <stdio.h>
+#endif
+#include <linux/netfilter/nf_conntrack_h323_asn1.h>
+
+/* Trace Flag */
+#ifndef H323_TRACE
+#define H323_TRACE 0
+#endif
+
+#if H323_TRACE
+#define TAB_SIZE 4
+#define IFTHEN(cond, act) if(cond){act;}
+#ifdef __KERNEL__
+#define PRINT printk
+#else
+#define PRINT printf
+#endif
+#define FNAME(name) name,
+#else
+#define IFTHEN(cond, act)
+#define PRINT(fmt, args...)
+#define FNAME(name)
+#endif
+
+/* ASN.1 Types */
+#define NUL 0
+#define BOOL 1
+#define OID 2
+#define INT 3
+#define ENUM 4
+#define BITSTR 5
+#define NUMSTR 6
+#define NUMDGT 6
+#define TBCDSTR 6
+#define OCTSTR 7
+#define PRTSTR 7
+#define IA5STR 7
+#define GENSTR 7
+#define BMPSTR 8
+#define SEQ 9
+#define SET 9
+#define SEQOF 10
+#define SETOF 10
+#define CHOICE 11
+
+/* Constraint Types */
+#define FIXD 0
+/* #define BITS 1-8 */
+#define BYTE 9
+#define WORD 10
+#define CONS 11
+#define SEMI 12
+#define UNCO 13
+
+/* ASN.1 Type Attributes */
+#define SKIP 0
+#define STOP 1
+#define DECODE 2
+#define EXT 4
+#define OPEN 8
+#define OPT 16
+
+
+/* ASN.1 Field Structure */
+typedef struct field_t {
+#if H323_TRACE
+	char *name;
+#endif
+	unsigned char type;
+	unsigned char sz;
+	unsigned char lb;
+	unsigned char ub;
+	unsigned short attr;
+	unsigned short offset;
+	const struct field_t *fields;
+} field_t;
+
+/* Bit Stream */
+struct bitstr {
+	unsigned char *buf;
+	unsigned char *beg;
+	unsigned char *end;
+	unsigned char *cur;
+	unsigned int bit;
+};
+
+/* Tool Functions */
+#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
+#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
+#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
+static unsigned int get_len(struct bitstr *bs);
+static unsigned int get_bit(struct bitstr *bs);
+static unsigned int get_bits(struct bitstr *bs, unsigned int b);
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
+static unsigned int get_uint(struct bitstr *bs, int b);
+
+/* Decoder Functions */
+static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
+
+/* Decoder Functions Vector */
+typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
+static const decoder_t Decoders[] = {
+	decode_nul,
+	decode_bool,
+	decode_oid,
+	decode_int,
+	decode_enum,
+	decode_bitstr,
+	decode_numstr,
+	decode_octstr,
+	decode_bmpstr,
+	decode_seq,
+	decode_seqof,
+	decode_choice,
+};
+
+/*
+ * H.323 Types
+ */
+#include "nf_conntrack_h323_types.c"
+
+/*
+ * Functions
+ */
+
+/* Assume bs is aligned && v < 16384 */
+static unsigned int get_len(struct bitstr *bs)
+{
+	unsigned int v;
+
+	v = *bs->cur++;
+
+	if (v & 0x80) {
+		v &= 0x3f;
+		v <<= 8;
+		v += *bs->cur++;
+	}
+
+	return v;
+}
+
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+	bits += bs->bit;
+	bytes += bits / BITS_PER_BYTE;
+	if (bits % BITS_PER_BYTE > 0)
+		bytes++;
+
+	if (bs->cur + bytes > bs->end)
+		return 1;
+
+	return 0;
+}
+
+static unsigned int get_bit(struct bitstr *bs)
+{
+	unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
+
+	INC_BIT(bs);
+
+	return b;
+}
+
+/* Assume b <= 8 */
+static unsigned int get_bits(struct bitstr *bs, unsigned int b)
+{
+	unsigned int v, l;
+
+	v = (*bs->cur) & (0xffU >> bs->bit);
+	l = b + bs->bit;
+
+	if (l < 8) {
+		v >>= 8 - l;
+		bs->bit = l;
+	} else if (l == 8) {
+		bs->cur++;
+		bs->bit = 0;
+	} else {		/* l > 8 */
+
+		v <<= 8;
+		v += *(++bs->cur);
+		v >>= 16 - l;
+		bs->bit = l - 8;
+	}
+
+	return v;
+}
+
+/* Assume b <= 32 */
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
+{
+	unsigned int v, l, shift, bytes;
+
+	if (!b)
+		return 0;
+
+	l = bs->bit + b;
+
+	if (l < 8) {
+		v = (unsigned int)(*bs->cur) << (bs->bit + 24);
+		bs->bit = l;
+	} else if (l == 8) {
+		v = (unsigned int)(*bs->cur++) << (bs->bit + 24);
+		bs->bit = 0;
+	} else {
+		for (bytes = l >> 3, shift = 24, v = 0; bytes;
+		     bytes--, shift -= 8)
+			v |= (unsigned int)(*bs->cur++) << shift;
+
+		if (l < 32) {
+			v |= (unsigned int)(*bs->cur) << shift;
+			v <<= bs->bit;
+		} else if (l > 32) {
+			v <<= bs->bit;
+			v |= (*bs->cur) >> (8 - bs->bit);
+		}
+
+		bs->bit = l & 0x7;
+	}
+
+	v &= 0xffffffff << (32 - b);
+
+	return v;
+}
+
+/*
+ * Assume bs is aligned and sizeof(unsigned int) == 4
+ */
+static unsigned int get_uint(struct bitstr *bs, int b)
+{
+	unsigned int v = 0;
+
+	switch (b) {
+	case 4:
+		v |= *bs->cur++;
+		v <<= 8;
+		fallthrough;
+	case 3:
+		v |= *bs->cur++;
+		v <<= 8;
+		fallthrough;
+	case 2:
+		v |= *bs->cur++;
+		v <<= 8;
+		fallthrough;
+	case 1:
+		v |= *bs->cur++;
+		break;
+	}
+	return v;
+}
+
+static int decode_nul(struct bitstr *bs, const struct field_t *f,
+                      char *base, int level)
+{
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	return H323_ERROR_NONE;
+}
+
+static int decode_bool(struct bitstr *bs, const struct field_t *f,
+                       char *base, int level)
+{
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	INC_BIT(bs);
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_oid(struct bitstr *bs, const struct field_t *f,
+                      char *base, int level)
+{
+	int len;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	BYTE_ALIGN(bs);
+	if (nf_h323_error_boundary(bs, 1, 0))
+		return H323_ERROR_BOUND;
+
+	len = *bs->cur++;
+	bs->cur += len;
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+
+	return H323_ERROR_NONE;
+}
+
+static int decode_int(struct bitstr *bs, const struct field_t *f,
+                      char *base, int level)
+{
+	unsigned int len;
+
+	PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
+
+	switch (f->sz) {
+	case BYTE:		/* Range == 256 */
+		BYTE_ALIGN(bs);
+		bs->cur++;
+		break;
+	case WORD:		/* 257 <= Range <= 64K */
+		BYTE_ALIGN(bs);
+		bs->cur += 2;
+		break;
+	case CONS:		/* 64K < Range < 4G */
+		if (nf_h323_error_boundary(bs, 0, 2))
+			return H323_ERROR_BOUND;
+		len = get_bits(bs, 2) + 1;
+		BYTE_ALIGN(bs);
+		if (base && (f->attr & DECODE)) {	/* timeToLive */
+			unsigned int v = get_uint(bs, len) + f->lb;
+			PRINT(" = %u", v);
+			*((unsigned int *)(base + f->offset)) = v;
+		}
+		bs->cur += len;
+		break;
+	case UNCO:
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs);
+		bs->cur += len;
+		break;
+	default:		/* 2 <= Range <= 255 */
+		INC_BITS(bs, f->sz);
+		break;
+	}
+
+	PRINT("\n");
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_enum(struct bitstr *bs, const struct field_t *f,
+                       char *base, int level)
+{
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	if ((f->attr & EXT) && get_bit(bs)) {
+		INC_BITS(bs, 7);
+	} else {
+		INC_BITS(bs, f->sz);
+	}
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
+                         char *base, int level)
+{
+	unsigned int len;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	BYTE_ALIGN(bs);
+	switch (f->sz) {
+	case FIXD:		/* fixed length > 16 */
+		len = f->lb;
+		break;
+	case WORD:		/* 2-byte length */
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = (*bs->cur++) << 8;
+		len += (*bs->cur++) + f->lb;
+		break;
+	case SEMI:
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs);
+		break;
+	default:
+		len = 0;
+		break;
+	}
+
+	bs->cur += len >> 3;
+	bs->bit = len & 7;
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_numstr(struct bitstr *bs, const struct field_t *f,
+                         char *base, int level)
+{
+	unsigned int len;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	/* 2 <= Range <= 255 */
+	if (nf_h323_error_boundary(bs, 0, f->sz))
+		return H323_ERROR_BOUND;
+	len = get_bits(bs, f->sz) + f->lb;
+
+	BYTE_ALIGN(bs);
+	INC_BITS(bs, (len << 2));
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_octstr(struct bitstr *bs, const struct field_t *f,
+                         char *base, int level)
+{
+	unsigned int len;
+
+	PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
+
+	switch (f->sz) {
+	case FIXD:		/* Range == 1 */
+		if (f->lb > 2) {
+			BYTE_ALIGN(bs);
+			if (base && (f->attr & DECODE)) {
+				/* The IP Address */
+				IFTHEN(f->lb == 4,
+				       PRINT(" = %d.%d.%d.%d:%d",
+					     bs->cur[0], bs->cur[1],
+					     bs->cur[2], bs->cur[3],
+					     bs->cur[4] * 256 + bs->cur[5]));
+				*((unsigned int *)(base + f->offset)) =
+				    bs->cur - bs->buf;
+			}
+		}
+		len = f->lb;
+		break;
+	case BYTE:		/* Range == 256 */
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
+		len = (*bs->cur++) + f->lb;
+		break;
+	case SEMI:
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs) + f->lb;
+		break;
+	default:		/* 2 <= Range <= 255 */
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
+		len = get_bits(bs, f->sz) + f->lb;
+		BYTE_ALIGN(bs);
+		break;
+	}
+
+	bs->cur += len;
+
+	PRINT("\n");
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
+                         char *base, int level)
+{
+	unsigned int len;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	switch (f->sz) {
+	case BYTE:		/* Range == 256 */
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
+		len = (*bs->cur++) + f->lb;
+		break;
+	default:		/* 2 <= Range <= 255 */
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
+		len = get_bits(bs, f->sz) + f->lb;
+		BYTE_ALIGN(bs);
+		break;
+	}
+
+	bs->cur += len << 1;
+
+	if (nf_h323_error_boundary(bs, 0, 0))
+		return H323_ERROR_BOUND;
+	return H323_ERROR_NONE;
+}
+
+static int decode_seq(struct bitstr *bs, const struct field_t *f,
+                      char *base, int level)
+{
+	unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
+	int err;
+	const struct field_t *son;
+	unsigned char *beg = NULL;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	/* Decode? */
+	base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+	/* Extensible? */
+	if (nf_h323_error_boundary(bs, 0, 1))
+		return H323_ERROR_BOUND;
+	ext = (f->attr & EXT) ? get_bit(bs) : 0;
+
+	/* Get fields bitmap */
+	if (nf_h323_error_boundary(bs, 0, f->sz))
+		return H323_ERROR_BOUND;
+	bmp = get_bitmap(bs, f->sz);
+	if (base)
+		*(unsigned int *)base = bmp;
+
+	/* Decode the root components */
+	for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) {
+		if (son->attr & STOP) {
+			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+			      son->name);
+			return H323_ERROR_STOP;
+		}
+
+		if (son->attr & OPT) {	/* Optional component */
+			if (!((0x80000000U >> (opt++)) & bmp))	/* Not exist */
+				continue;
+		}
+
+		/* Decode */
+		if (son->attr & OPEN) {	/* Open field */
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
+			len = get_len(bs);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
+			if (!base || !(son->attr & DECODE)) {
+				PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
+				      " ", son->name);
+				bs->cur += len;
+				continue;
+			}
+			beg = bs->cur;
+
+			/* Decode */
+			if ((err = (Decoders[son->type]) (bs, son, base,
+							  level + 1)) <
+			    H323_ERROR_NONE)
+				return err;
+
+			bs->cur = beg + len;
+			bs->bit = 0;
+		} else if ((err = (Decoders[son->type]) (bs, son, base,
+							 level + 1)) <
+			   H323_ERROR_NONE)
+			return err;
+	}
+
+	/* No extension? */
+	if (!ext)
+		return H323_ERROR_NONE;
+
+	/* Get the extension bitmap */
+	if (nf_h323_error_boundary(bs, 0, 7))
+		return H323_ERROR_BOUND;
+	bmp2_len = get_bits(bs, 7) + 1;
+	if (nf_h323_error_boundary(bs, 0, bmp2_len))
+		return H323_ERROR_BOUND;
+	bmp2 = get_bitmap(bs, bmp2_len);
+	bmp |= bmp2 >> f->sz;
+	if (base)
+		*(unsigned int *)base = bmp;
+	BYTE_ALIGN(bs);
+
+	/* Decode the extension components */
+	for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
+		/* Check Range */
+		if (i >= f->ub) {	/* Newer Version? */
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
+			len = get_len(bs);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
+			bs->cur += len;
+			continue;
+		}
+
+		if (son->attr & STOP) {
+			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+			      son->name);
+			return H323_ERROR_STOP;
+		}
+
+		if (!((0x80000000 >> opt) & bmp2))	/* Not present */
+			continue;
+
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
+		if (!base || !(son->attr & DECODE)) {
+			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+			      son->name);
+			bs->cur += len;
+			continue;
+		}
+		beg = bs->cur;
+
+		if ((err = (Decoders[son->type]) (bs, son, base,
+						  level + 1)) <
+		    H323_ERROR_NONE)
+			return err;
+
+		bs->cur = beg + len;
+		bs->bit = 0;
+	}
+	return H323_ERROR_NONE;
+}
+
+static int decode_seqof(struct bitstr *bs, const struct field_t *f,
+                        char *base, int level)
+{
+	unsigned int count, effective_count = 0, i, len = 0;
+	int err;
+	const struct field_t *son;
+	unsigned char *beg = NULL;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	/* Decode? */
+	base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+	/* Decode item count */
+	switch (f->sz) {
+	case BYTE:
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 1, 0))
+			return H323_ERROR_BOUND;
+		count = *bs->cur++;
+		break;
+	case WORD:
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		count = *bs->cur++;
+		count <<= 8;
+		count += *bs->cur++;
+		break;
+	case SEMI:
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		count = get_len(bs);
+		break;
+	default:
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
+		count = get_bits(bs, f->sz);
+		break;
+	}
+	count += f->lb;
+
+	/* Write Count */
+	if (base) {
+		effective_count = count > f->ub ? f->ub : count;
+		*(unsigned int *)base = effective_count;
+		base += sizeof(unsigned int);
+	}
+
+	/* Decode nested field */
+	son = f->fields;
+	if (base)
+		base -= son->offset;
+	for (i = 0; i < count; i++) {
+		if (son->attr & OPEN) {
+			BYTE_ALIGN(bs);
+			if (nf_h323_error_boundary(bs, 2, 0))
+				return H323_ERROR_BOUND;
+			len = get_len(bs);
+			if (nf_h323_error_boundary(bs, len, 0))
+				return H323_ERROR_BOUND;
+			if (!base || !(son->attr & DECODE)) {
+				PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
+				      " ", son->name);
+				bs->cur += len;
+				continue;
+			}
+			beg = bs->cur;
+
+			if ((err = (Decoders[son->type]) (bs, son,
+							  i <
+							  effective_count ?
+							  base : NULL,
+							  level + 1)) <
+			    H323_ERROR_NONE)
+				return err;
+
+			bs->cur = beg + len;
+			bs->bit = 0;
+		} else
+			if ((err = (Decoders[son->type]) (bs, son,
+							  i <
+							  effective_count ?
+							  base : NULL,
+							  level + 1)) <
+			    H323_ERROR_NONE)
+				return err;
+
+		if (base)
+			base += son->offset;
+	}
+
+	return H323_ERROR_NONE;
+}
+
+static int decode_choice(struct bitstr *bs, const struct field_t *f,
+                         char *base, int level)
+{
+	unsigned int type, ext, len = 0;
+	int err;
+	const struct field_t *son;
+	unsigned char *beg = NULL;
+
+	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+	/* Decode? */
+	base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+	/* Decode the choice index number */
+	if (nf_h323_error_boundary(bs, 0, 1))
+		return H323_ERROR_BOUND;
+	if ((f->attr & EXT) && get_bit(bs)) {
+		ext = 1;
+		if (nf_h323_error_boundary(bs, 0, 7))
+			return H323_ERROR_BOUND;
+		type = get_bits(bs, 7) + f->lb;
+	} else {
+		ext = 0;
+		if (nf_h323_error_boundary(bs, 0, f->sz))
+			return H323_ERROR_BOUND;
+		type = get_bits(bs, f->sz);
+		if (type >= f->lb)
+			return H323_ERROR_RANGE;
+	}
+
+	/* Write Type */
+	if (base)
+		*(unsigned int *)base = type;
+
+	/* Check Range */
+	if (type >= f->ub) {	/* Newer version? */
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, 2, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
+		bs->cur += len;
+		return H323_ERROR_NONE;
+	}
+
+	/* Transfer to son level */
+	son = &f->fields[type];
+	if (son->attr & STOP) {
+		PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name);
+		return H323_ERROR_STOP;
+	}
+
+	if (ext || (son->attr & OPEN)) {
+		BYTE_ALIGN(bs);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
+		len = get_len(bs);
+		if (nf_h323_error_boundary(bs, len, 0))
+			return H323_ERROR_BOUND;
+		if (!base || !(son->attr & DECODE)) {
+			PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+			      son->name);
+			bs->cur += len;
+			return H323_ERROR_NONE;
+		}
+		beg = bs->cur;
+
+		if ((err = (Decoders[son->type]) (bs, son, base, level + 1)) <
+		    H323_ERROR_NONE)
+			return err;
+
+		bs->cur = beg + len;
+		bs->bit = 0;
+	} else if ((err = (Decoders[son->type]) (bs, son, base, level + 1)) <
+		   H323_ERROR_NONE)
+		return err;
+
+	return H323_ERROR_NONE;
+}
+
+int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
+{
+	static const struct field_t ras_message = {
+		FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
+		0, _RasMessage
+	};
+	struct bitstr bs;
+
+	bs.buf = bs.beg = bs.cur = buf;
+	bs.end = buf + sz;
+	bs.bit = 0;
+
+	return decode_choice(&bs, &ras_message, (char *) ras, 0);
+}
+
+static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
+				      size_t sz, H323_UserInformation *uuie)
+{
+	static const struct field_t h323_userinformation = {
+		FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
+		0, _H323_UserInformation
+	};
+	struct bitstr bs;
+
+	bs.buf = buf;
+	bs.beg = bs.cur = beg;
+	bs.end = beg + sz;
+	bs.bit = 0;
+
+	return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0);
+}
+
+int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
+					 MultimediaSystemControlMessage *
+					 mscm)
+{
+	static const struct field_t multimediasystemcontrolmessage = {
+		FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
+		DECODE | EXT, 0, _MultimediaSystemControlMessage
+	};
+	struct bitstr bs;
+
+	bs.buf = bs.beg = bs.cur = buf;
+	bs.end = buf + sz;
+	bs.bit = 0;
+
+	return decode_choice(&bs, &multimediasystemcontrolmessage,
+			     (char *) mscm, 0);
+}
+
+int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
+{
+	unsigned char *p = buf;
+	int len;
+
+	if (!p || sz < 1)
+		return H323_ERROR_BOUND;
+
+	/* Protocol Discriminator */
+	if (*p != 0x08) {
+		PRINT("Unknown Protocol Discriminator\n");
+		return H323_ERROR_RANGE;
+	}
+	p++;
+	sz--;
+
+	/* CallReferenceValue */
+	if (sz < 1)
+		return H323_ERROR_BOUND;
+	len = *p++;
+	sz--;
+	if (sz < len)
+		return H323_ERROR_BOUND;
+	p += len;
+	sz -= len;
+
+	/* Message Type */
+	if (sz < 2)
+		return H323_ERROR_BOUND;
+	q931->MessageType = *p++;
+	sz--;
+	PRINT("MessageType = %02X\n", q931->MessageType);
+	if (*p & 0x80) {
+		p++;
+		sz--;
+	}
+
+	/* Decode Information Elements */
+	while (sz > 0) {
+		if (*p == 0x7e) {	/* UserUserIE */
+			if (sz < 3)
+				break;
+			p++;
+			len = *p++ << 8;
+			len |= *p++;
+			sz -= 3;
+			if (sz < len)
+				break;
+			p++;
+			len--;
+			return DecodeH323_UserInformation(buf, p, len,
+							  &q931->UUIE);
+		}
+		p++;
+		sz--;
+		if (sz < 1)
+			break;
+		len = *p++;
+		sz--;
+		if (sz < len)
+			break;
+		p += len;
+		sz -= len;
+	}
+
+	PRINT("Q.931 UUIE not found\n");
+
+	return H323_ERROR_BOUND;
+}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
new file mode 100644
index 000000000..5a9bce24f
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -0,0 +1,1803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * H.323 connection tracking helper
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the 'brute force' H.323 connection tracking module by
+ * Jozsef Kadlecsik <kadlec@netfilter.org>
+ *
+ * For more information, please see http://nath323.sourceforge.net/
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+#define H323_MAX_SIZE 65535
+
+/* Parameters */
+static unsigned int default_rrq_ttl __read_mostly = 300;
+module_param(default_rrq_ttl, uint, 0600);
+MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
+
+static int gkrouted_only __read_mostly = 1;
+module_param(gkrouted_only, int, 0600);
+MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
+
+static bool callforward_filter __read_mostly = true;
+module_param(callforward_filter, bool, 0600);
+MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
+				     "if both endpoints are on different sides "
+				     "(determined by routing information)");
+
+const struct nfct_h323_nat_hooks __rcu *nfct_h323_nat_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfct_h323_nat_hook);
+
+static DEFINE_SPINLOCK(nf_h323_lock);
+static char *h323_buffer;
+
+static struct nf_conntrack_helper nf_conntrack_helper_h245;
+static struct nf_conntrack_helper nf_conntrack_helper_q931[];
+static struct nf_conntrack_helper nf_conntrack_helper_ras[];
+
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
+			 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			 unsigned char **data, int *datalen, int *dataoff)
+{
+	struct nf_ct_h323_master *info = nfct_help_data(ct);
+	int dir = CTINFO2DIR(ctinfo);
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	int tcpdatalen;
+	int tcpdataoff;
+	unsigned char *tpkt;
+	int tpktlen;
+	int tpktoff;
+
+	/* Get TCP header */
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+
+	/* Get TCP data offset */
+	tcpdataoff = protoff + th->doff * 4;
+
+	/* Get TCP data length */
+	tcpdatalen = skb->len - tcpdataoff;
+	if (tcpdatalen <= 0)	/* No TCP data */
+		goto clear_out;
+
+	if (tcpdatalen > H323_MAX_SIZE)
+		tcpdatalen = H323_MAX_SIZE;
+
+	if (*data == NULL) {	/* first TPKT */
+		/* Get first TPKT pointer */
+		tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
+					  h323_buffer);
+		if (!tpkt)
+			goto clear_out;
+
+		/* Validate TPKT identifier */
+		if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
+			/* Netmeeting sends TPKT header and data separately */
+			if (info->tpkt_len[dir] > 0) {
+				pr_debug("nf_ct_h323: previous packet "
+					 "indicated separate TPKT data of %hu "
+					 "bytes\n", info->tpkt_len[dir]);
+				if (info->tpkt_len[dir] <= tcpdatalen) {
+					/* Yes, there was a TPKT header
+					 * received */
+					*data = tpkt;
+					*datalen = info->tpkt_len[dir];
+					*dataoff = 0;
+					goto out;
+				}
+
+				/* Fragmented TPKT */
+				pr_debug("nf_ct_h323: fragmented TPKT\n");
+				goto clear_out;
+			}
+
+			/* It is not even a TPKT */
+			return 0;
+		}
+		tpktoff = 0;
+	} else {		/* Next TPKT */
+		tpktoff = *dataoff + *datalen;
+		tcpdatalen -= tpktoff;
+		if (tcpdatalen <= 4)	/* No more TPKT */
+			goto clear_out;
+		tpkt = *data + *datalen;
+
+		/* Validate TPKT identifier */
+		if (tpkt[0] != 0x03 || tpkt[1] != 0)
+			goto clear_out;
+	}
+
+	/* Validate TPKT length */
+	tpktlen = tpkt[2] * 256 + tpkt[3];
+	if (tpktlen < 4)
+		goto clear_out;
+	if (tpktlen > tcpdatalen) {
+		if (tcpdatalen == 4) {	/* Separate TPKT header */
+			/* Netmeeting sends TPKT header and data separately */
+			pr_debug("nf_ct_h323: separate TPKT header indicates "
+				 "there will be TPKT data of %d bytes\n",
+				 tpktlen - 4);
+			info->tpkt_len[dir] = tpktlen - 4;
+			return 0;
+		}
+
+		pr_debug("nf_ct_h323: incomplete TPKT (fragmented?)\n");
+		goto clear_out;
+	}
+
+	/* This is the encapsulated data */
+	*data = tpkt + 4;
+	*datalen = tpktlen - 4;
+	*dataoff = tpktoff + 4;
+
+      out:
+	/* Clear TPKT length */
+	info->tpkt_len[dir] = 0;
+	return 1;
+
+      clear_out:
+	info->tpkt_len[dir] = 0;
+	return 0;
+}
+
+static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
+			 H245_TransportAddress *taddr,
+			 union nf_inet_addr *addr, __be16 *port)
+{
+	const unsigned char *p;
+	int len;
+
+	if (taddr->choice != eH245_TransportAddress_unicastAddress)
+		return 0;
+
+	switch (taddr->unicastAddress.choice) {
+	case eUnicastAddress_iPAddress:
+		if (nf_ct_l3num(ct) != AF_INET)
+			return 0;
+		p = data + taddr->unicastAddress.iPAddress.network;
+		len = 4;
+		break;
+	case eUnicastAddress_iP6Address:
+		if (nf_ct_l3num(ct) != AF_INET6)
+			return 0;
+		p = data + taddr->unicastAddress.iP6Address.network;
+		len = 16;
+		break;
+	default:
+		return 0;
+	}
+
+	memcpy(addr, p, len);
+	memset((void *)addr + len, 0, sizeof(*addr) - len);
+	memcpy(port, p + len, sizeof(__be16));
+
+	return 1;
+}
+
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo,
+			   unsigned int protoff,
+			   unsigned char **data, int dataoff,
+			   H245_TransportAddress *taddr)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	__be16 rtp_port, rtcp_port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *rtp_exp;
+	struct nf_conntrack_expect *rtcp_exp;
+
+	/* Read RTP or RTCP address */
+	if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+	    memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+	    port == 0)
+		return 0;
+
+	/* RTP port is even */
+	rtp_port = port & ~htons(1);
+	rtcp_port = port | htons(1);
+
+	/* Create expect for RTP */
+	if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(rtp_exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_UDP, NULL, &rtp_port);
+
+	/* Create expect for RTCP */
+	if ((rtcp_exp = nf_ct_expect_alloc(ct)) == NULL) {
+		nf_ct_expect_put(rtp_exp);
+		return -1;
+	}
+	nf_ct_expect_init(rtcp_exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_UDP, NULL, &rtcp_port);
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+		   &ct->tuplehash[!dir].tuple.dst.u3,
+		   sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+		   nathook &&
+		   nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+		   ct->status & IPS_NAT_MASK) {
+		/* NAT needed */
+		ret = nathook->nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
+					    taddr, port, rtp_port, rtp_exp, rtcp_exp);
+	} else {		/* Conntrack only */
+		if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+			if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
+				pr_debug("nf_ct_h323: expect RTP ");
+				nf_ct_dump_tuple(&rtp_exp->tuple);
+				pr_debug("nf_ct_h323: expect RTCP ");
+				nf_ct_dump_tuple(&rtcp_exp->tuple);
+			} else {
+				nf_ct_unexpect_related(rtp_exp);
+				ret = -1;
+			}
+		} else
+			ret = -1;
+	}
+
+	nf_ct_expect_put(rtp_exp);
+	nf_ct_expect_put(rtcp_exp);
+
+	return ret;
+}
+
+static int expect_t120(struct sk_buff *skb,
+		       struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, int dataoff,
+		       H245_TransportAddress *taddr)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	/* Read T.120 address */
+	if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+	    memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+	    port == 0)
+		return 0;
+
+	/* Create expect for T.120 connections */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
+	exp->flags = NF_CT_EXPECT_PERMANENT;	/* Accept multiple channels */
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+		   &ct->tuplehash[!dir].tuple.dst.u3,
+		   sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+	    nathook &&
+	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		/* NAT needed */
+		ret = nathook->nat_t120(skb, ct, ctinfo, protoff, data,
+					dataoff, taddr, port, exp);
+	} else {		/* Conntrack only */
+		if (nf_ct_expect_related(exp, 0) == 0) {
+			pr_debug("nf_ct_h323: expect T.120 ");
+			nf_ct_dump_tuple(&exp->tuple);
+		} else
+			ret = -1;
+	}
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+static int process_h245_channel(struct sk_buff *skb,
+				struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
+				unsigned char **data, int dataoff,
+				H2250LogicalChannelParameters *channel)
+{
+	int ret;
+
+	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
+		/* RTP */
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
+				      &channel->mediaChannel);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (channel->
+	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
+		/* RTCP */
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
+				      &channel->mediaControlChannel);
+		if (ret < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, int dataoff,
+		       OpenLogicalChannel *olc)
+{
+	int ret;
+
+	pr_debug("nf_ct_h323: OpenLogicalChannel\n");
+
+	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
+	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
+	{
+		ret = process_h245_channel(skb, ct, ctinfo,
+					   protoff, data, dataoff,
+					   &olc->
+					   forwardLogicalChannelParameters.
+					   multiplexParameters.
+					   h2250LogicalChannelParameters);
+		if (ret < 0)
+			return -1;
+	}
+
+	if ((olc->options &
+	     eOpenLogicalChannel_reverseLogicalChannelParameters) &&
+	    (olc->reverseLogicalChannelParameters.options &
+	     eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
+	    && (olc->reverseLogicalChannelParameters.multiplexParameters.
+		choice ==
+		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+	{
+		ret =
+		    process_h245_channel(skb, ct, ctinfo,
+					 protoff, data, dataoff,
+					 &olc->
+					 reverseLogicalChannelParameters.
+					 multiplexParameters.
+					 h2250LogicalChannelParameters);
+		if (ret < 0)
+			return -1;
+	}
+
+	if ((olc->options & eOpenLogicalChannel_separateStack) &&
+	    olc->forwardLogicalChannelParameters.dataType.choice ==
+	    eDataType_data &&
+	    olc->forwardLogicalChannelParameters.dataType.data.application.
+	    choice == eDataApplicationCapability_application_t120 &&
+	    olc->forwardLogicalChannelParameters.dataType.data.application.
+	    t120.choice == eDataProtocolCapability_separateLANStack &&
+	    olc->separateStack.networkAddress.choice ==
+	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
+		ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
+				  &olc->separateStack.networkAddress.
+				  localAreaAddress);
+		if (ret < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo,
+			unsigned int protoff, unsigned char **data, int dataoff,
+			OpenLogicalChannelAck *olca)
+{
+	H2250LogicalChannelAckParameters *ack;
+	int ret;
+
+	pr_debug("nf_ct_h323: OpenLogicalChannelAck\n");
+
+	if ((olca->options &
+	     eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
+	    (olca->reverseLogicalChannelParameters.options &
+	     eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
+	    && (olca->reverseLogicalChannelParameters.multiplexParameters.
+		choice ==
+		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+	{
+		ret = process_h245_channel(skb, ct, ctinfo,
+					   protoff, data, dataoff,
+					   &olca->
+					   reverseLogicalChannelParameters.
+					   multiplexParameters.
+					   h2250LogicalChannelParameters);
+		if (ret < 0)
+			return -1;
+	}
+
+	if ((olca->options &
+	     eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
+	    (olca->forwardMultiplexAckParameters.choice ==
+	     eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
+	{
+		ack = &olca->forwardMultiplexAckParameters.
+		    h2250LogicalChannelAckParameters;
+		if (ack->options &
+		    eH2250LogicalChannelAckParameters_mediaChannel) {
+			/* RTP */
+			ret = expect_rtp_rtcp(skb, ct, ctinfo,
+					      protoff, data, dataoff,
+					      &ack->mediaChannel);
+			if (ret < 0)
+				return -1;
+		}
+
+		if (ack->options &
+		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
+			/* RTCP */
+			ret = expect_rtp_rtcp(skb, ct, ctinfo,
+					      protoff, data, dataoff,
+					      &ack->mediaControlChannel);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
+		olca->separateStack.networkAddress.choice ==
+		eNetworkAccessParameters_networkAddress_localAreaAddress) {
+		ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
+				  &olca->separateStack.networkAddress.
+				  localAreaAddress);
+		if (ret < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo,
+			unsigned int protoff, unsigned char **data, int dataoff,
+			MultimediaSystemControlMessage *mscm)
+{
+	switch (mscm->choice) {
+	case eMultimediaSystemControlMessage_request:
+		if (mscm->request.choice ==
+		    eRequestMessage_openLogicalChannel) {
+			return process_olc(skb, ct, ctinfo,
+					   protoff, data, dataoff,
+					   &mscm->request.openLogicalChannel);
+		}
+		pr_debug("nf_ct_h323: H.245 Request %d\n",
+			 mscm->request.choice);
+		break;
+	case eMultimediaSystemControlMessage_response:
+		if (mscm->response.choice ==
+		    eResponseMessage_openLogicalChannelAck) {
+			return process_olca(skb, ct, ctinfo,
+					    protoff, data, dataoff,
+					    &mscm->response.
+					    openLogicalChannelAck);
+		}
+		pr_debug("nf_ct_h323: H.245 Response %d\n",
+			 mscm->response.choice);
+		break;
+	default:
+		pr_debug("nf_ct_h323: H.245 signal %d\n", mscm->choice);
+		break;
+	}
+
+	return 0;
+}
+
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
+		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	static MultimediaSystemControlMessage mscm;
+	unsigned char *data = NULL;
+	int datalen;
+	int dataoff;
+	int ret;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
+
+	spin_lock_bh(&nf_h323_lock);
+
+	/* Process each TPKT */
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
+			     &data, &datalen, &dataoff)) {
+		pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
+		nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+		/* Decode H.245 signal */
+		ret = DecodeMultimediaSystemControlMessage(data, datalen,
+							   &mscm);
+		if (ret < 0) {
+			pr_debug("nf_ct_h245: decoding error: %s\n",
+				 ret == H323_ERROR_BOUND ?
+				 "out of bound" : "out of range");
+			/* We don't drop when decoding error */
+			break;
+		}
+
+		/* Process H.245 signal */
+		if (process_h245(skb, ct, ctinfo, protoff,
+				 &data, dataoff, &mscm) < 0)
+			goto drop;
+	}
+
+	spin_unlock_bh(&nf_h323_lock);
+	return NF_ACCEPT;
+
+      drop:
+	spin_unlock_bh(&nf_h323_lock);
+	nf_ct_helper_log(skb, ct, "cannot process H.245 message");
+	return NF_DROP;
+}
+
+static const struct nf_conntrack_expect_policy h245_exp_policy = {
+	.max_expected	= H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
+	.timeout	= 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
+	.name			= "H.245",
+	.me			= THIS_MODULE,
+	.tuple.src.l3num	= AF_UNSPEC,
+	.tuple.dst.protonum	= IPPROTO_UDP,
+	.help			= h245_help,
+	.expect_policy		= &h245_exp_policy,
+};
+
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+		  TransportAddress *taddr,
+		  union nf_inet_addr *addr, __be16 *port)
+{
+	const unsigned char *p;
+	int len;
+
+	switch (taddr->choice) {
+	case eTransportAddress_ipAddress:
+		if (nf_ct_l3num(ct) != AF_INET)
+			return 0;
+		p = data + taddr->ipAddress.ip;
+		len = 4;
+		break;
+	case eTransportAddress_ip6Address:
+		if (nf_ct_l3num(ct) != AF_INET6)
+			return 0;
+		p = data + taddr->ip6Address.ip;
+		len = 16;
+		break;
+	default:
+		return 0;
+	}
+
+	memcpy(addr, p, len);
+	memset((void *)addr + len, 0, sizeof(*addr) - len);
+	memcpy(port, p + len, sizeof(__be16));
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(get_h225_addr);
+
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff, unsigned char **data, int dataoff,
+		       TransportAddress *taddr)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	/* Read h245Address */
+	if (!get_h225_addr(ct, *data, taddr, &addr, &port) ||
+	    memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+	    port == 0)
+		return 0;
+
+	/* Create expect for h245 connection */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
+	exp->helper = &nf_conntrack_helper_h245;
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+		   &ct->tuplehash[!dir].tuple.dst.u3,
+		   sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+	    nathook &&
+	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		/* NAT needed */
+		ret = nathook->nat_h245(skb, ct, ctinfo, protoff, data,
+					dataoff, taddr, port, exp);
+	} else {		/* Conntrack only */
+		if (nf_ct_expect_related(exp, 0) == 0) {
+			pr_debug("nf_ct_q931: expect H.245 ");
+			nf_ct_dump_tuple(&exp->tuple);
+		} else
+			ret = -1;
+	}
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+/* If the calling party is on the same side of the forward-to party,
+ * we don't need to track the second call
+ */
+static int callforward_do_filter(struct net *net,
+				 const union nf_inet_addr *src,
+				 const union nf_inet_addr *dst,
+				 u_int8_t family)
+{
+	int ret = 0;
+
+	switch (family) {
+	case AF_INET: {
+		struct flowi4 fl1, fl2;
+		struct rtable *rt1, *rt2;
+
+		memset(&fl1, 0, sizeof(fl1));
+		fl1.daddr = src->ip;
+
+		memset(&fl2, 0, sizeof(fl2));
+		fl2.daddr = dst->ip;
+		if (!nf_ip_route(net, (struct dst_entry **)&rt1,
+				 flowi4_to_flowi(&fl1), false)) {
+			if (!nf_ip_route(net, (struct dst_entry **)&rt2,
+					 flowi4_to_flowi(&fl2), false)) {
+				if (rt_nexthop(rt1, fl1.daddr) ==
+				    rt_nexthop(rt2, fl2.daddr) &&
+				    rt1->dst.dev  == rt2->dst.dev)
+					ret = 1;
+				dst_release(&rt2->dst);
+			}
+			dst_release(&rt1->dst);
+		}
+		break;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6: {
+		struct rt6_info *rt1, *rt2;
+		struct flowi6 fl1, fl2;
+
+		memset(&fl1, 0, sizeof(fl1));
+		fl1.daddr = src->in6;
+
+		memset(&fl2, 0, sizeof(fl2));
+		fl2.daddr = dst->in6;
+		if (!nf_ip6_route(net, (struct dst_entry **)&rt1,
+				  flowi6_to_flowi(&fl1), false)) {
+			if (!nf_ip6_route(net, (struct dst_entry **)&rt2,
+					  flowi6_to_flowi(&fl2), false)) {
+				if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
+						    rt6_nexthop(rt2, &fl2.daddr)) &&
+				    rt1->dst.dev == rt2->dst.dev)
+					ret = 1;
+				dst_release(&rt2->dst);
+			}
+			dst_release(&rt1->dst);
+		}
+		break;
+	}
+#endif
+	}
+	return ret;
+
+}
+
+static int expect_callforwarding(struct sk_buff *skb,
+				 struct nf_conn *ct,
+				 enum ip_conntrack_info ctinfo,
+				 unsigned int protoff,
+				 unsigned char **data, int dataoff,
+				 TransportAddress *taddr)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+	struct net *net = nf_ct_net(ct);
+
+	/* Read alternativeAddress */
+	if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0)
+		return 0;
+
+	/* If the calling party is on the same side of the forward-to party,
+	 * we don't need to track the second call
+	 */
+	if (callforward_filter &&
+	    callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3,
+				  nf_ct_l3num(ct))) {
+		pr_debug("nf_ct_q931: Call Forwarding not tracked\n");
+		return 0;
+	}
+
+	/* Create expect for the second call leg */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
+	exp->helper = nf_conntrack_helper_q931;
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+		   &ct->tuplehash[!dir].tuple.dst.u3,
+		   sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+	    nathook &&
+	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		/* Need NAT */
+		ret = nathook->nat_callforwarding(skb, ct, ctinfo,
+						  protoff, data, dataoff,
+						  taddr, port, exp);
+	} else {		/* Conntrack only */
+		if (nf_ct_expect_related(exp, 0) == 0) {
+			pr_debug("nf_ct_q931: expect Call Forwarding ");
+			nf_ct_dump_tuple(&exp->tuple);
+		} else
+			ret = -1;
+	}
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
+			 unsigned char **data, int dataoff,
+			 Setup_UUIE *setup)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret;
+	int i;
+	__be16 port;
+	union nf_inet_addr addr;
+
+	pr_debug("nf_ct_q931: Setup\n");
+
+	if (setup->options & eSetup_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &setup->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
+	    nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK &&
+	    get_h225_addr(ct, *data, &setup->destCallSignalAddress,
+			  &addr, &port) &&
+	    memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
+		pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n",
+			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3,
+			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
+		ret = nathook->set_h225_addr(skb, protoff, data, dataoff,
+					     &setup->destCallSignalAddress,
+					     &ct->tuplehash[!dir].tuple.src.u3,
+					     ct->tuplehash[!dir].tuple.src.u.tcp.port);
+		if (ret < 0)
+			return -1;
+	}
+
+	if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
+	    nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK &&
+	    get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
+			  &addr, &port) &&
+	    memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
+		pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n",
+			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3,
+			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
+		ret = nathook->set_h225_addr(skb, protoff, data, dataoff,
+					     &setup->sourceCallSignalAddress,
+					     &ct->tuplehash[!dir].tuple.dst.u3,
+					     ct->tuplehash[!dir].tuple.dst.u.tcp.port);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (setup->options & eSetup_UUIE_fastStart) {
+		for (i = 0; i < setup->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &setup->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_callproceeding(struct sk_buff *skb,
+				  struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int protoff,
+				  unsigned char **data, int dataoff,
+				  CallProceeding_UUIE *callproc)
+{
+	int ret;
+	int i;
+
+	pr_debug("nf_ct_q931: CallProceeding\n");
+
+	if (callproc->options & eCallProceeding_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &callproc->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (callproc->options & eCallProceeding_UUIE_fastStart) {
+		for (i = 0; i < callproc->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &callproc->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo,
+			   unsigned int protoff,
+			   unsigned char **data, int dataoff,
+			   Connect_UUIE *connect)
+{
+	int ret;
+	int i;
+
+	pr_debug("nf_ct_q931: Connect\n");
+
+	if (connect->options & eConnect_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &connect->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (connect->options & eConnect_UUIE_fastStart) {
+		for (i = 0; i < connect->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &connect->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
+			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
+			    unsigned char **data, int dataoff,
+			    Alerting_UUIE *alert)
+{
+	int ret;
+	int i;
+
+	pr_debug("nf_ct_q931: Alerting\n");
+
+	if (alert->options & eAlerting_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &alert->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (alert->options & eAlerting_UUIE_fastStart) {
+		for (i = 0; i < alert->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &alert->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
+			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
+			    unsigned char **data, int dataoff,
+			    Facility_UUIE *facility)
+{
+	int ret;
+	int i;
+
+	pr_debug("nf_ct_q931: Facility\n");
+
+	if (facility->reason.choice == eFacilityReason_callForwarded) {
+		if (facility->options & eFacility_UUIE_alternativeAddress)
+			return expect_callforwarding(skb, ct, ctinfo,
+						     protoff, data, dataoff,
+						     &facility->
+						     alternativeAddress);
+		return 0;
+	}
+
+	if (facility->options & eFacility_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &facility->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (facility->options & eFacility_UUIE_fastStart) {
+		for (i = 0; i < facility->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &facility->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
+			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
+			    unsigned char **data, int dataoff,
+			    Progress_UUIE *progress)
+{
+	int ret;
+	int i;
+
+	pr_debug("nf_ct_q931: Progress\n");
+
+	if (progress->options & eProgress_UUIE_h245Address) {
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
+				  &progress->h245Address);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (progress->options & eProgress_UUIE_fastStart) {
+		for (i = 0; i < progress->fastStart.count; i++) {
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
+					  &progress->fastStart.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo,
+			unsigned int protoff, unsigned char **data, int dataoff,
+			Q931 *q931)
+{
+	H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
+	int i;
+	int ret = 0;
+
+	switch (pdu->h323_message_body.choice) {
+	case eH323_UU_PDU_h323_message_body_setup:
+		ret = process_setup(skb, ct, ctinfo, protoff, data, dataoff,
+				    &pdu->h323_message_body.setup);
+		break;
+	case eH323_UU_PDU_h323_message_body_callProceeding:
+		ret = process_callproceeding(skb, ct, ctinfo,
+					     protoff, data, dataoff,
+					     &pdu->h323_message_body.
+					     callProceeding);
+		break;
+	case eH323_UU_PDU_h323_message_body_connect:
+		ret = process_connect(skb, ct, ctinfo, protoff, data, dataoff,
+				      &pdu->h323_message_body.connect);
+		break;
+	case eH323_UU_PDU_h323_message_body_alerting:
+		ret = process_alerting(skb, ct, ctinfo, protoff, data, dataoff,
+				       &pdu->h323_message_body.alerting);
+		break;
+	case eH323_UU_PDU_h323_message_body_facility:
+		ret = process_facility(skb, ct, ctinfo, protoff, data, dataoff,
+				       &pdu->h323_message_body.facility);
+		break;
+	case eH323_UU_PDU_h323_message_body_progress:
+		ret = process_progress(skb, ct, ctinfo, protoff, data, dataoff,
+				       &pdu->h323_message_body.progress);
+		break;
+	default:
+		pr_debug("nf_ct_q931: Q.931 signal %d\n",
+			 pdu->h323_message_body.choice);
+		break;
+	}
+
+	if (ret < 0)
+		return -1;
+
+	if (pdu->options & eH323_UU_PDU_h245Control) {
+		for (i = 0; i < pdu->h245Control.count; i++) {
+			ret = process_h245(skb, ct, ctinfo,
+					   protoff, data, dataoff,
+					   &pdu->h245Control.item[i]);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
+		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	static Q931 q931;
+	unsigned char *data = NULL;
+	int datalen;
+	int dataoff;
+	int ret;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
+
+	spin_lock_bh(&nf_h323_lock);
+
+	/* Process each TPKT */
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
+			     &data, &datalen, &dataoff)) {
+		pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
+		nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+		/* Decode Q.931 signal */
+		ret = DecodeQ931(data, datalen, &q931);
+		if (ret < 0) {
+			pr_debug("nf_ct_q931: decoding error: %s\n",
+				 ret == H323_ERROR_BOUND ?
+				 "out of bound" : "out of range");
+			/* We don't drop when decoding error */
+			break;
+		}
+
+		/* Process Q.931 signal */
+		if (process_q931(skb, ct, ctinfo, protoff,
+				 &data, dataoff, &q931) < 0)
+			goto drop;
+	}
+
+	spin_unlock_bh(&nf_h323_lock);
+	return NF_ACCEPT;
+
+      drop:
+	spin_unlock_bh(&nf_h323_lock);
+	nf_ct_helper_log(skb, ct, "cannot process Q.931 message");
+	return NF_DROP;
+}
+
+static const struct nf_conntrack_expect_policy q931_exp_policy = {
+	/* T.120 and H.245 */
+	.max_expected		= H323_RTP_CHANNEL_MAX * 4 + 4,
+	.timeout		= 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
+	{
+		.name			= "Q.931",
+		.me			= THIS_MODULE,
+		.tuple.src.l3num	= AF_INET,
+		.tuple.src.u.tcp.port	= cpu_to_be16(Q931_PORT),
+		.tuple.dst.protonum	= IPPROTO_TCP,
+		.help			= q931_help,
+		.expect_policy		= &q931_exp_policy,
+	},
+	{
+		.name			= "Q.931",
+		.me			= THIS_MODULE,
+		.tuple.src.l3num	= AF_INET6,
+		.tuple.src.u.tcp.port	= cpu_to_be16(Q931_PORT),
+		.tuple.dst.protonum	= IPPROTO_TCP,
+		.help			= q931_help,
+		.expect_policy		= &q931_exp_policy,
+	},
+};
+
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
+				   int *datalen)
+{
+	const struct udphdr *uh;
+	struct udphdr _uh;
+	int dataoff;
+
+	uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
+	if (uh == NULL)
+		return NULL;
+	dataoff = protoff + sizeof(_uh);
+	if (dataoff >= skb->len)
+		return NULL;
+	*datalen = skb->len - dataoff;
+	if (*datalen > H323_MAX_SIZE)
+		*datalen = H323_MAX_SIZE;
+
+	return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
+}
+
+static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
+					       union nf_inet_addr *addr,
+					       __be16 port)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple tuple;
+
+	memset(&tuple.src.u3, 0, sizeof(tuple.src.u3));
+	tuple.src.u.tcp.port = 0;
+	memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3));
+	tuple.dst.u.tcp.port = port;
+	tuple.dst.protonum = IPPROTO_TCP;
+
+	exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
+	if (exp && exp->master == ct)
+		return exp;
+	return NULL;
+}
+
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff, unsigned char **data,
+		       TransportAddress *taddr, int count)
+{
+	struct nf_ct_h323_master *info = nfct_help_data(ct);
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	int i;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	/* Look for the first related address */
+	for (i = 0; i < count; i++) {
+		if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+		    memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3,
+			   sizeof(addr)) == 0 && port != 0)
+			break;
+	}
+
+	if (i >= count)		/* Not found */
+		return 0;
+
+	/* Create expect for Q.931 */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  gkrouted_only ? /* only accept calls from GK? */
+				&ct->tuplehash[!dir].tuple.src.u3 : NULL,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  IPPROTO_TCP, NULL, &port);
+	exp->helper = nf_conntrack_helper_q931;
+	exp->flags = NF_CT_EXPECT_PERMANENT;	/* Accept multiple calls */
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {	/* Need NAT */
+		ret = nathook->nat_q931(skb, ct, ctinfo, protoff, data,
+					taddr, i, port, exp);
+	} else {		/* Conntrack only */
+		if (nf_ct_expect_related(exp, 0) == 0) {
+			pr_debug("nf_ct_ras: expect Q.931 ");
+			nf_ct_dump_tuple(&exp->tuple);
+
+			/* Save port for looking up expect in processing RCF */
+			info->sig_port[dir] = port;
+		} else
+			ret = -1;
+	}
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, GatekeeperRequest *grq)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+
+	pr_debug("nf_ct_ras: GRQ\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK)	/* NATed */
+		return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+					     &grq->rasAddress, 1);
+	return 0;
+}
+
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, GatekeeperConfirm *gcf)
+{
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	pr_debug("nf_ct_ras: GCF\n");
+
+	if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port))
+		return 0;
+
+	/* Registration port is the same as discovery port */
+	if (!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+	    port == ct->tuplehash[dir].tuple.src.u.udp.port)
+		return 0;
+
+	/* Avoid RAS expectation loops. A GCF is never expected. */
+	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+		return 0;
+
+	/* Need new expect */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_UDP, NULL, &port);
+	exp->helper = nf_conntrack_helper_ras;
+
+	if (nf_ct_expect_related(exp, 0) == 0) {
+		pr_debug("nf_ct_ras: expect RAS ");
+		nf_ct_dump_tuple(&exp->tuple);
+	} else
+		ret = -1;
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, RegistrationRequest *rrq)
+{
+	struct nf_ct_h323_master *info = nfct_help_data(ct);
+	const struct nfct_h323_nat_hooks *nathook;
+	int ret;
+
+	pr_debug("nf_ct_ras: RRQ\n");
+
+	ret = expect_q931(skb, ct, ctinfo, protoff, data,
+			  rrq->callSignalAddress.item,
+			  rrq->callSignalAddress.count);
+	if (ret < 0)
+		return -1;
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+					    rrq->rasAddress.item,
+					    rrq->rasAddress.count);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (rrq->options & eRegistrationRequest_timeToLive) {
+		pr_debug("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
+		info->timeout = rrq->timeToLive;
+	} else
+		info->timeout = default_rrq_ttl;
+
+	return 0;
+}
+
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, RegistrationConfirm *rcf)
+{
+	struct nf_ct_h323_master *info = nfct_help_data(ct);
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret;
+	struct nf_conntrack_expect *exp;
+
+	pr_debug("nf_ct_ras: RCF\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+					    rcf->callSignalAddress.item,
+					    rcf->callSignalAddress.count);
+		if (ret < 0)
+			return -1;
+	}
+
+	if (rcf->options & eRegistrationConfirm_timeToLive) {
+		pr_debug("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
+		info->timeout = rcf->timeToLive;
+	}
+
+	if (info->timeout > 0) {
+		pr_debug("nf_ct_ras: set RAS connection timeout to "
+			 "%u seconds\n", info->timeout);
+		nf_ct_refresh(ct, skb, info->timeout * HZ);
+
+		/* Set expect timeout */
+		spin_lock_bh(&nf_conntrack_expect_lock);
+		exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
+				  info->sig_port[!dir]);
+		if (exp) {
+			pr_debug("nf_ct_ras: set Q.931 expect "
+				 "timeout to %u seconds for",
+				 info->timeout);
+			nf_ct_dump_tuple(&exp->tuple);
+			mod_timer_pending(&exp->timeout,
+					  jiffies + info->timeout * HZ);
+		}
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+	}
+
+	return 0;
+}
+
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, UnregistrationRequest *urq)
+{
+	struct nf_ct_h323_master *info = nfct_help_data(ct);
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	int ret;
+
+	pr_debug("nf_ct_ras: URQ\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+					    urq->callSignalAddress.item,
+					    urq->callSignalAddress.count);
+		if (ret < 0)
+			return -1;
+	}
+
+	/* Clear old expect */
+	nf_ct_remove_expectations(ct);
+	info->sig_port[dir] = 0;
+	info->sig_port[!dir] = 0;
+
+	/* Give it 30 seconds for UCF or URJ */
+	nf_ct_refresh(ct, skb, 30 * HZ);
+
+	return 0;
+}
+
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, AdmissionRequest *arq)
+{
+	const struct nf_ct_h323_master *info = nfct_help_data(ct);
+	const struct nfct_h323_nat_hooks *nathook;
+	int dir = CTINFO2DIR(ctinfo);
+	__be16 port;
+	union nf_inet_addr addr;
+
+	pr_debug("nf_ct_ras: ARQ\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (!nathook)
+		return 0;
+
+	if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
+	    get_h225_addr(ct, *data, &arq->destCallSignalAddress,
+			  &addr, &port) &&
+	    !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+	    port == info->sig_port[dir] &&
+	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		/* Answering ARQ */
+		return nathook->set_h225_addr(skb, protoff, data, 0,
+					      &arq->destCallSignalAddress,
+					      &ct->tuplehash[!dir].tuple.dst.u3,
+					      info->sig_port[!dir]);
+	}
+
+	if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
+	    get_h225_addr(ct, *data, &arq->srcCallSignalAddress,
+			  &addr, &port) &&
+	    !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		/* Calling ARQ */
+		return nathook->set_h225_addr(skb, protoff, data, 0,
+					      &arq->srcCallSignalAddress,
+					      &ct->tuplehash[!dir].tuple.dst.u3,
+					      port);
+	}
+
+	return 0;
+}
+
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, AdmissionConfirm *acf)
+{
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	pr_debug("nf_ct_ras: ACF\n");
+
+	if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress,
+			   &addr, &port))
+		return 0;
+
+	if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) {
+		const struct nfct_h323_nat_hooks *nathook;
+
+		/* Answering ACF */
+		nathook = rcu_dereference(nfct_h323_nat_hook);
+		if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+		    ct->status & IPS_NAT_MASK)
+			return nathook->set_sig_addr(skb, ct, ctinfo, protoff,
+						     data,
+						     &acf->destCallSignalAddress, 1);
+		return 0;
+	}
+
+	/* Need new expect */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
+	exp->flags = NF_CT_EXPECT_PERMANENT;
+	exp->helper = nf_conntrack_helper_q931;
+
+	if (nf_ct_expect_related(exp, 0) == 0) {
+		pr_debug("nf_ct_ras: expect Q.931 ");
+		nf_ct_dump_tuple(&exp->tuple);
+	} else
+		ret = -1;
+
+	nf_ct_expect_put(exp);
+
+	return ret;
+}
+
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, LocationRequest *lrq)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+
+	pr_debug("nf_ct_ras: LRQ\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK)
+		return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+					     &lrq->replyAddress, 1);
+	return 0;
+}
+
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, LocationConfirm *lcf)
+{
+	int dir = CTINFO2DIR(ctinfo);
+	int ret = 0;
+	__be16 port;
+	union nf_inet_addr addr;
+	struct nf_conntrack_expect *exp;
+
+	pr_debug("nf_ct_ras: LCF\n");
+
+	if (!get_h225_addr(ct, *data, &lcf->callSignalAddress,
+			   &addr, &port))
+		return 0;
+
+	/* Need new expect for call signal */
+	if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+		return -1;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &ct->tuplehash[!dir].tuple.src.u3, &addr,
+			  IPPROTO_TCP, NULL, &port);
+	exp->flags = NF_CT_EXPECT_PERMANENT;
+	exp->helper = nf_conntrack_helper_q931;
+
+	if (nf_ct_expect_related(exp, 0) == 0) {
+		pr_debug("nf_ct_ras: expect Q.931 ");
+		nf_ct_dump_tuple(&exp->tuple);
+	} else
+		ret = -1;
+
+	nf_ct_expect_put(exp);
+
+	/* Ignore rasAddress */
+
+	return ret;
+}
+
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, InfoRequestResponse *irr)
+{
+	const struct nfct_h323_nat_hooks *nathook;
+	int ret;
+
+	pr_debug("nf_ct_ras: IRR\n");
+
+	nathook = rcu_dereference(nfct_h323_nat_hook);
+	if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+	    ct->status & IPS_NAT_MASK) {
+		ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+					    &irr->rasAddress, 1);
+		if (ret < 0)
+			return -1;
+
+		ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+					    irr->callSignalAddress.item,
+					    irr->callSignalAddress.count);
+		if (ret < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
+		       unsigned char **data, RasMessage *ras)
+{
+	switch (ras->choice) {
+	case eRasMessage_gatekeeperRequest:
+		return process_grq(skb, ct, ctinfo, protoff, data,
+				   &ras->gatekeeperRequest);
+	case eRasMessage_gatekeeperConfirm:
+		return process_gcf(skb, ct, ctinfo, protoff, data,
+				   &ras->gatekeeperConfirm);
+	case eRasMessage_registrationRequest:
+		return process_rrq(skb, ct, ctinfo, protoff, data,
+				   &ras->registrationRequest);
+	case eRasMessage_registrationConfirm:
+		return process_rcf(skb, ct, ctinfo, protoff, data,
+				   &ras->registrationConfirm);
+	case eRasMessage_unregistrationRequest:
+		return process_urq(skb, ct, ctinfo, protoff, data,
+				   &ras->unregistrationRequest);
+	case eRasMessage_admissionRequest:
+		return process_arq(skb, ct, ctinfo, protoff, data,
+				   &ras->admissionRequest);
+	case eRasMessage_admissionConfirm:
+		return process_acf(skb, ct, ctinfo, protoff, data,
+				   &ras->admissionConfirm);
+	case eRasMessage_locationRequest:
+		return process_lrq(skb, ct, ctinfo, protoff, data,
+				   &ras->locationRequest);
+	case eRasMessage_locationConfirm:
+		return process_lcf(skb, ct, ctinfo, protoff, data,
+				   &ras->locationConfirm);
+	case eRasMessage_infoRequestResponse:
+		return process_irr(skb, ct, ctinfo, protoff, data,
+				   &ras->infoRequestResponse);
+	default:
+		pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
+		break;
+	}
+
+	return 0;
+}
+
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
+		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	static RasMessage ras;
+	unsigned char *data;
+	int datalen = 0;
+	int ret;
+
+	pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
+
+	spin_lock_bh(&nf_h323_lock);
+
+	/* Get UDP data */
+	data = get_udp_data(skb, protoff, &datalen);
+	if (data == NULL)
+		goto accept;
+	pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
+	nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+	/* Decode RAS message */
+	ret = DecodeRasMessage(data, datalen, &ras);
+	if (ret < 0) {
+		pr_debug("nf_ct_ras: decoding error: %s\n",
+			 ret == H323_ERROR_BOUND ?
+			 "out of bound" : "out of range");
+		goto accept;
+	}
+
+	/* Process RAS message */
+	if (process_ras(skb, ct, ctinfo, protoff, &data, &ras) < 0)
+		goto drop;
+
+      accept:
+	spin_unlock_bh(&nf_h323_lock);
+	return NF_ACCEPT;
+
+      drop:
+	spin_unlock_bh(&nf_h323_lock);
+	nf_ct_helper_log(skb, ct, "cannot process RAS message");
+	return NF_DROP;
+}
+
+static const struct nf_conntrack_expect_policy ras_exp_policy = {
+	.max_expected		= 32,
+	.timeout		= 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
+	{
+		.name			= "RAS",
+		.me			= THIS_MODULE,
+		.tuple.src.l3num	= AF_INET,
+		.tuple.src.u.udp.port	= cpu_to_be16(RAS_PORT),
+		.tuple.dst.protonum	= IPPROTO_UDP,
+		.help			= ras_help,
+		.expect_policy		= &ras_exp_policy,
+	},
+	{
+		.name			= "RAS",
+		.me			= THIS_MODULE,
+		.tuple.src.l3num	= AF_INET6,
+		.tuple.src.u.udp.port	= cpu_to_be16(RAS_PORT),
+		.tuple.dst.protonum	= IPPROTO_UDP,
+		.help			= ras_help,
+		.expect_policy		= &ras_exp_policy,
+	},
+};
+
+static int __init h323_helper_init(void)
+{
+	int ret;
+
+	ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+	if (ret < 0)
+		return ret;
+	ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+	if (ret < 0)
+		goto err1;
+	ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras,
+					ARRAY_SIZE(nf_conntrack_helper_ras));
+	if (ret < 0)
+		goto err2;
+
+	return 0;
+err2:
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+err1:
+	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+	return ret;
+}
+
+static void __exit h323_helper_exit(void)
+{
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_ras,
+					ARRAY_SIZE(nf_conntrack_helper_ras));
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+}
+
+static void __exit nf_conntrack_h323_fini(void)
+{
+	h323_helper_exit();
+	kfree(h323_buffer);
+	pr_debug("nf_ct_h323: fini\n");
+}
+
+static int __init nf_conntrack_h323_init(void)
+{
+	int ret;
+
+	NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
+
+	h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL);
+	if (!h323_buffer)
+		return -ENOMEM;
+	ret = h323_helper_init();
+	if (ret < 0)
+		goto err1;
+	pr_debug("nf_ct_h323: init success\n");
+	return 0;
+err1:
+	kfree(h323_buffer);
+	return ret;
+}
+
+module_init(nf_conntrack_h323_init);
+module_exit(nf_conntrack_h323_fini);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_h323");
+MODULE_ALIAS_NFCT_HELPER("RAS");
+MODULE_ALIAS_NFCT_HELPER("Q.931");
+MODULE_ALIAS_NFCT_HELPER("H.245");
diff --git a/net/netfilter/nf_conntrack_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c
new file mode 100644
index 000000000..fb1cb67a5
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_types.c
@@ -0,0 +1,1921 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Generated by Jing Min Zhao's ASN.1 parser, May 16 2007
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ */
+
+static const struct field_t _TransportAddress_ipAddress[] = {	/* SEQUENCE */
+	{FNAME("ip") OCTSTR, FIXD, 4, 0, DECODE,
+	 offsetof(TransportAddress_ipAddress, ip), NULL},
+	{FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute_route[] = {	/* SEQUENCE OF */
+	{FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute_routing[] = {	/* CHOICE */
+	{FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute[] = {	/* SEQUENCE */
+	{FNAME("ip") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+	{FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0,
+	 _TransportAddress_ipSourceRoute_route},
+	{FNAME("routing") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+	 _TransportAddress_ipSourceRoute_routing},
+};
+
+static const struct field_t _TransportAddress_ipxAddress[] = {	/* SEQUENCE */
+	{FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
+	{FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+	{FNAME("port") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ip6Address[] = {	/* SEQUENCE */
+	{FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE,
+	 offsetof(TransportAddress_ip6Address, ip), NULL},
+	{FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H221NonStandard[] = {	/* SEQUENCE */
+	{FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _NonStandardIdentifier[] = {	/* CHOICE */
+	{FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP | EXT, 0,
+	 _H221NonStandard},
+};
+
+static const struct field_t _NonStandardParameter[] = {	/* SEQUENCE */
+	{FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+	 _NonStandardIdentifier},
+	{FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress[] = {	/* CHOICE */
+	{FNAME("ipAddress") SEQ, 0, 2, 2, DECODE,
+	 offsetof(TransportAddress, ipAddress), _TransportAddress_ipAddress},
+	{FNAME("ipSourceRoute") SEQ, 0, 4, 4, SKIP | EXT, 0,
+	 _TransportAddress_ipSourceRoute},
+	{FNAME("ipxAddress") SEQ, 0, 3, 3, SKIP, 0,
+	 _TransportAddress_ipxAddress},
+	{FNAME("ip6Address") SEQ, 0, 2, 2, DECODE | EXT,
+	 offsetof(TransportAddress, ip6Address),
+	 _TransportAddress_ip6Address},
+	{FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+	{FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0,
+	 _NonStandardParameter},
+};
+
+static const struct field_t _AliasAddress[] = {	/* CHOICE */
+	{FNAME("dialedDigits") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("h323-ID") BMPSTR, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("url-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("transportID") CHOICE, 3, 7, 7, SKIP | EXT, 0, NULL},
+	{FNAME("email-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("partyNumber") CHOICE, 3, 5, 5, SKIP | EXT, 0, NULL},
+	{FNAME("mobileUIM") CHOICE, 1, 2, 2, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_sourceAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _VendorIdentifier[] = {	/* SEQUENCE */
+	{FNAME("vendor") SEQ, 0, 3, 3, SKIP | EXT, 0, _H221NonStandard},
+	{FNAME("productId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("versionId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperInfo[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+};
+
+static const struct field_t _H310Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H320Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H321Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H322Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H323Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H324Caps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _VoiceCaps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T120OnlyCaps[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SupportedProtocols[] = {	/* CHOICE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP, 0,
+	 _NonStandardParameter},
+	{FNAME("h310") SEQ, 1, 1, 3, SKIP | EXT, 0, _H310Caps},
+	{FNAME("h320") SEQ, 1, 1, 3, SKIP | EXT, 0, _H320Caps},
+	{FNAME("h321") SEQ, 1, 1, 3, SKIP | EXT, 0, _H321Caps},
+	{FNAME("h322") SEQ, 1, 1, 3, SKIP | EXT, 0, _H322Caps},
+	{FNAME("h323") SEQ, 1, 1, 3, SKIP | EXT, 0, _H323Caps},
+	{FNAME("h324") SEQ, 1, 1, 3, SKIP | EXT, 0, _H324Caps},
+	{FNAME("voice") SEQ, 1, 1, 3, SKIP | EXT, 0, _VoiceCaps},
+	{FNAME("t120-only") SEQ, 1, 1, 3, SKIP | EXT, 0, _T120OnlyCaps},
+	{FNAME("nonStandardProtocol") SEQ, 2, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("t38FaxAnnexbOnly") SEQ, 2, 5, 5, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _GatewayInfo_protocol[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 4, 9, 11, SKIP | EXT, 0, _SupportedProtocols},
+};
+
+static const struct field_t _GatewayInfo[] = {	/* SEQUENCE */
+	{FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _GatewayInfo_protocol},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+};
+
+static const struct field_t _McuInfo[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _TerminalInfo[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+};
+
+static const struct field_t _EndpointType[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("vendor") SEQ, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 _VendorIdentifier},
+	{FNAME("gatekeeper") SEQ, 1, 1, 1, SKIP | EXT | OPT, 0,
+	 _GatekeeperInfo},
+	{FNAME("gateway") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, _GatewayInfo},
+	{FNAME("mcu") SEQ, 1, 1, 2, SKIP | EXT | OPT, 0, _McuInfo},
+	{FNAME("terminal") SEQ, 1, 1, 1, SKIP | EXT | OPT, 0, _TerminalInfo},
+	{FNAME("mc") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("undefinedNode") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("set") BITSTR, FIXD, 32, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedTunnelledProtocols") SEQOF, SEMI, 0, 0, SKIP | OPT,
+	 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_destinationAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _Setup_UUIE_destExtraCallInfo[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _Setup_UUIE_destExtraCRV[] = {	/* SEQUENCE OF */
+	{FNAME("item") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_conferenceGoal[] = {	/* CHOICE */
+	{FNAME("create") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("join") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("invite") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("capability-negotiation") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("callIndependentSupplementaryService") NUL, FIXD, 0, 0, SKIP,
+	 0, NULL},
+};
+
+static const struct field_t _Q954Details[] = {	/* SEQUENCE */
+	{FNAME("conferenceCalling") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("threePartyService") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _QseriesOptions[] = {	/* SEQUENCE */
+	{FNAME("q932Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q951Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q952Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q953Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q955Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q956Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q957Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("q954Info") SEQ, 0, 2, 2, SKIP | EXT, 0, _Q954Details},
+};
+
+static const struct field_t _CallType[] = {	/* CHOICE */
+	{FNAME("pointToPoint") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("oneToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("nToOne") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("nToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_NonStandardIdentifier_h221NonStandard[] = {	/* SEQUENCE */
+	{FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_NonStandardIdentifier[] = {	/* CHOICE */
+	{FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP, 0,
+	 _H245_NonStandardIdentifier_h221NonStandard},
+};
+
+static const struct field_t _H245_NonStandardParameter[] = {	/* SEQUENCE */
+	{FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP, 0,
+	 _H245_NonStandardIdentifier},
+	{FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H261VideoCapability[] = {	/* SEQUENCE */
+	{FNAME("qcifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0,
+	 NULL},
+	{FNAME("maxBitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("stillImageTransmission") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H262VideoCapability[] = {	/* SEQUENCE */
+	{FNAME("profileAndLevel-SPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-MPatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-MPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-MPatH-14") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-MPatHL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-SNRatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-SNRatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-SpatialatH-14") BOOL, FIXD, 0, 0, SKIP, 0,
+	 NULL},
+	{FNAME("profileAndLevel-HPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-HPatH-14") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("profileAndLevel-HPatHL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("samplesPerLine") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("linesPerFrame") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("framesPerSecond") INT, 4, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("luminanceSampleRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H263VideoCapability[] = {	/* SEQUENCE */
+	{FNAME("sqcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("qcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cif4MPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cif16MPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("maxBitRate") INT, CONS, 1, 0, SKIP, 0, NULL},
+	{FNAME("unrestrictedVector") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("arithmeticCoding") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("advancedPrediction") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("pbFrames") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0,
+	 NULL},
+	{FNAME("hrd-B") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("bppMaxKb") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("slowSqcifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("slowQcifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("slowCifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("slowCif4MPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("slowCif16MPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("errorCompensation") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("enhancementLayerInfo") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("h263Options") SEQ, 5, 29, 31, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _IS11172VideoCapability[] = {	/* SEQUENCE */
+	{FNAME("constrainedBitstream") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("samplesPerLine") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("linesPerFrame") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("pictureRate") INT, 4, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("luminanceSampleRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _VideoCapability[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("h261VideoCapability") SEQ, 2, 5, 6, SKIP | EXT, 0,
+	 _H261VideoCapability},
+	{FNAME("h262VideoCapability") SEQ, 6, 17, 18, SKIP | EXT, 0,
+	 _H262VideoCapability},
+	{FNAME("h263VideoCapability") SEQ, 7, 13, 21, SKIP | EXT, 0,
+	 _H263VideoCapability},
+	{FNAME("is11172VideoCapability") SEQ, 6, 7, 8, SKIP | EXT, 0,
+	 _IS11172VideoCapability},
+	{FNAME("genericVideoCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _AudioCapability_g7231[] = {	/* SEQUENCE */
+	{FNAME("maxAl-sduAudioFrames") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("silenceSuppression") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _IS11172AudioCapability[] = {	/* SEQUENCE */
+	{FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling32k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling44k1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling48k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("singleChannel") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("twoChannels") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _IS13818AudioCapability[] = {	/* SEQUENCE */
+	{FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling16k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling22k05") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling24k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling32k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling44k1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("audioSampling48k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("singleChannel") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("twoChannels") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("threeChannels2-1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("threeChannels3-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fourChannels2-0-2-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fourChannels2-2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fourChannels3-1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fiveChannels3-0-2-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fiveChannels3-2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("lowFrequencyEnhancement") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("multilingual") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _AudioCapability[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("g711Alaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g711Alaw56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g711Ulaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g711Ulaw56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g722-64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g722-56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g722-48k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g7231") SEQ, 0, 2, 2, SKIP, 0, _AudioCapability_g7231},
+	{FNAME("g728") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g729") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g729AnnexA") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("is11172AudioCapability") SEQ, 0, 9, 9, SKIP | EXT, 0,
+	 _IS11172AudioCapability},
+	{FNAME("is13818AudioCapability") SEQ, 0, 21, 21, SKIP | EXT, 0,
+	 _IS13818AudioCapability},
+	{FNAME("g729wAnnexB") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g729AnnexAwAnnexB") INT, BYTE, 1, 0, SKIP, 0, NULL},
+	{FNAME("g7231AnnexCCapability") SEQ, 1, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("gsmFullRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("gsmHalfRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("gsmEnhancedFullRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("genericAudioCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+	{FNAME("g729Extensions") SEQ, 1, 8, 8, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _DataProtocolCapability[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("v14buffered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("v42lapm") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("hdlcFrameTunnelling") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("h310SeparateVCStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("h310SingleVCStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("transparent") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("segmentationAndReassembly") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("hdlcFrameTunnelingwSAR") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("v120") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("separateLANStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("v76wCompression") CHOICE, 2, 3, 3, SKIP | EXT, 0, NULL},
+	{FNAME("tcp") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("udp") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T84Profile_t84Restricted[] = {	/* SEQUENCE */
+	{FNAME("qcif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("cif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("ccir601Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("ccir601Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("hdtvSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("hdtvProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("g3FacsMH200x100") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("g3FacsMH200x200") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("g4FacsMMR200x100") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("g4FacsMMR200x200") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("jbig200x200Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("jbig200x200Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("jbig300x300Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("jbig300x300Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("digPhotoLow") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("digPhotoMedSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("digPhotoMedProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("digPhotoHighSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("digPhotoHighProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T84Profile[] = {	/* CHOICE */
+	{FNAME("t84Unrestricted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("t84Restricted") SEQ, 0, 19, 19, SKIP | EXT, 0,
+	 _T84Profile_t84Restricted},
+};
+
+static const struct field_t _DataApplicationCapability_application_t84[] = {	/* SEQUENCE */
+	{FNAME("t84Protocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("t84Profile") CHOICE, 1, 2, 2, SKIP, 0, _T84Profile},
+};
+
+static const struct field_t _DataApplicationCapability_application_nlpid[] = {	/* SEQUENCE */
+	{FNAME("nlpidProtocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("nlpidData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _DataApplicationCapability_application[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("t120") CHOICE, 3, 7, 14, DECODE | EXT,
+	 offsetof(DataApplicationCapability_application, t120),
+	 _DataProtocolCapability},
+	{FNAME("dsm-cc") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("userData") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("t84") SEQ, 0, 2, 2, SKIP, 0,
+	 _DataApplicationCapability_application_t84},
+	{FNAME("t434") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("h224") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("nlpid") SEQ, 0, 2, 2, SKIP, 0,
+	 _DataApplicationCapability_application_nlpid},
+	{FNAME("dsvdControl") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("h222DataPartitioning") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+	 _DataProtocolCapability},
+	{FNAME("t30fax") CHOICE, 3, 7, 14, SKIP | EXT, 0, NULL},
+	{FNAME("t140") CHOICE, 3, 7, 14, SKIP | EXT, 0, NULL},
+	{FNAME("t38fax") SEQ, 0, 2, 2, SKIP, 0, NULL},
+	{FNAME("genericDataCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _DataApplicationCapability[] = {	/* SEQUENCE */
+	{FNAME("application") CHOICE, 4, 10, 14, DECODE | EXT,
+	 offsetof(DataApplicationCapability, application),
+	 _DataApplicationCapability_application},
+	{FNAME("maxBitRate") INT, CONS, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _EncryptionMode[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("h233Encryption") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _DataType[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("nullData") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("videoData") CHOICE, 3, 5, 6, SKIP | EXT, 0, _VideoCapability},
+	{FNAME("audioData") CHOICE, 4, 14, 22, SKIP | EXT, 0,
+	 _AudioCapability},
+	{FNAME("data") SEQ, 0, 2, 2, DECODE | EXT, offsetof(DataType, data),
+	 _DataApplicationCapability},
+	{FNAME("encryptionData") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+	 _EncryptionMode},
+	{FNAME("h235Control") SEQ, 0, 2, 2, SKIP, 0, NULL},
+	{FNAME("h235Media") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+	{FNAME("multiplexedStream") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _H222LogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("resourceID") INT, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("subChannelID") INT, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("pcr-pid") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("programDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("streamDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters_adaptationLayerType_al3[] = {	/* SEQUENCE */
+	{FNAME("controlFieldOctets") INT, 2, 0, 0, SKIP, 0, NULL},
+	{FNAME("sendBufferSize") INT, CONS, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters_adaptationLayerType[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+	 _H245_NonStandardParameter},
+	{FNAME("al1Framed") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("al1NotFramed") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("al2WithoutSequenceNumbers") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("al2WithSequenceNumbers") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("al3") SEQ, 0, 2, 2, SKIP, 0,
+	 _H223LogicalChannelParameters_adaptationLayerType_al3},
+	{FNAME("al1M") SEQ, 0, 7, 8, SKIP | EXT, 0, NULL},
+	{FNAME("al2M") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+	{FNAME("al3M") SEQ, 0, 5, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("adaptationLayerType") CHOICE, 3, 6, 9, SKIP | EXT, 0,
+	 _H223LogicalChannelParameters_adaptationLayerType},
+	{FNAME("segmentableFlag") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CRCLength[] = {	/* CHOICE */
+	{FNAME("crc8bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("crc16bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("crc32bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76HDLCParameters[] = {	/* SEQUENCE */
+	{FNAME("crcLength") CHOICE, 2, 3, 3, SKIP | EXT, 0, _CRCLength},
+	{FNAME("n401") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("loopbackTestProcedure") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_suspendResume[] = {	/* CHOICE */
+	{FNAME("noSuspendResume") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("suspendResumewAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("suspendResumewoAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode_eRM_recovery[] = {	/* CHOICE */
+	{FNAME("rej") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("sREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("mSREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode_eRM[] = {	/* SEQUENCE */
+	{FNAME("windowSize") INT, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("recovery") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters_mode_eRM_recovery},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode[] = {	/* CHOICE */
+	{FNAME("eRM") SEQ, 0, 2, 2, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters_mode_eRM},
+	{FNAME("uNERM") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V75Parameters[] = {	/* SEQUENCE */
+	{FNAME("audioHeaderPresent") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("hdlcParameters") SEQ, 0, 3, 3, SKIP | EXT, 0,
+	 _V76HDLCParameters},
+	{FNAME("suspendResume") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters_suspendResume},
+	{FNAME("uIH") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("mode") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters_mode},
+	{FNAME("v75Parameters") SEQ, 0, 1, 1, SKIP | EXT, 0, _V75Parameters},
+};
+
+static const struct field_t _H2250LogicalChannelParameters_nonStandard[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
+};
+
+static const struct field_t _UnicastAddress_iPAddress[] = {	/* SEQUENCE */
+	{FNAME("network") OCTSTR, FIXD, 4, 0, DECODE,
+	 offsetof(UnicastAddress_iPAddress, network), NULL},
+	{FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPXAddress[] = {	/* SEQUENCE */
+	{FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
+	{FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+	{FNAME("tsapIdentifier") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iP6Address[] = {	/* SEQUENCE */
+	{FNAME("network") OCTSTR, FIXD, 16, 0, DECODE,
+	 offsetof(UnicastAddress_iP6Address, network), NULL},
+	{FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress_routing[] = {	/* CHOICE */
+	{FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress_route[] = {	/* SEQUENCE OF */
+	{FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress[] = {	/* SEQUENCE */
+	{FNAME("routing") CHOICE, 1, 2, 2, SKIP, 0,
+	 _UnicastAddress_iPSourceRouteAddress_routing},
+	{FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+	{FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0,
+	 _UnicastAddress_iPSourceRouteAddress_route},
+};
+
+static const struct field_t _UnicastAddress[] = {	/* CHOICE */
+	{FNAME("iPAddress") SEQ, 0, 2, 2, DECODE | EXT,
+	 offsetof(UnicastAddress, iPAddress), _UnicastAddress_iPAddress},
+	{FNAME("iPXAddress") SEQ, 0, 3, 3, SKIP | EXT, 0,
+	 _UnicastAddress_iPXAddress},
+	{FNAME("iP6Address") SEQ, 0, 2, 2, DECODE | EXT,
+	 offsetof(UnicastAddress, iP6Address), _UnicastAddress_iP6Address},
+	{FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+	{FNAME("iPSourceRouteAddress") SEQ, 0, 4, 4, SKIP | EXT, 0,
+	 _UnicastAddress_iPSourceRouteAddress},
+	{FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress_iPAddress[] = {	/* SEQUENCE */
+	{FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+	{FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress_iP6Address[] = {	/* SEQUENCE */
+	{FNAME("network") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+	{FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress[] = {	/* CHOICE */
+	{FNAME("iPAddress") SEQ, 0, 2, 2, SKIP | EXT, 0,
+	 _MulticastAddress_iPAddress},
+	{FNAME("iP6Address") SEQ, 0, 2, 2, SKIP | EXT, 0,
+	 _MulticastAddress_iP6Address},
+	{FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_TransportAddress[] = {	/* CHOICE */
+	{FNAME("unicastAddress") CHOICE, 3, 5, 7, DECODE | EXT,
+	 offsetof(H245_TransportAddress, unicastAddress), _UnicastAddress},
+	{FNAME("multicastAddress") CHOICE, 1, 2, 4, SKIP | EXT, 0,
+	 _MulticastAddress},
+};
+
+static const struct field_t _H2250LogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _H2250LogicalChannelParameters_nonStandard},
+	{FNAME("sessionID") INT, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("associatedSessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("mediaChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+	 offsetof(H2250LogicalChannelParameters, mediaChannel),
+	 _H245_TransportAddress},
+	{FNAME("mediaGuaranteedDelivery") BOOL, FIXD, 0, 0, SKIP | OPT, 0,
+	 NULL},
+	{FNAME("mediaControlChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+	 offsetof(H2250LogicalChannelParameters, mediaControlChannel),
+	 _H245_TransportAddress},
+	{FNAME("mediaControlGuaranteedDelivery") BOOL, FIXD, 0, 0, STOP | OPT,
+	 0, NULL},
+	{FNAME("silenceSuppression") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destination") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("dynamicRTPPayloadType") INT, 5, 96, 0, STOP | OPT, 0, NULL},
+	{FNAME("mediaPacketization") CHOICE, 0, 1, 2, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("transportCapability") SEQ, 3, 3, 3, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("redundancyEncoding") SEQ, 1, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("source") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters[] = {	/* CHOICE */
+	{FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
+	 _H222LogicalChannelParameters},
+	{FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
+	 _H223LogicalChannelParameters},
+	{FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters},
+	{FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+	 offsetof
+	 (OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters,
+	  h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+	{FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("dataType") CHOICE, 3, 6, 9, DECODE | EXT,
+	 offsetof(OpenLogicalChannel_forwardLogicalChannelParameters,
+		  dataType), _DataType},
+	{FNAME("multiplexParameters") CHOICE, 2, 3, 5, DECODE | EXT,
+	 offsetof(OpenLogicalChannel_forwardLogicalChannelParameters,
+		  multiplexParameters),
+	 _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters},
+	{FNAME("forwardLogicalChannelDependency") INT, WORD, 1, 0, SKIP | OPT,
+	 0, NULL},
+	{FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters[] = {	/* CHOICE */
+	{FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
+	 _H223LogicalChannelParameters},
+	{FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0,
+	 _V76LogicalChannelParameters},
+	{FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+	 offsetof
+	 (OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters,
+	  h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+};
+
+static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("dataType") CHOICE, 3, 6, 9, SKIP | EXT, 0, _DataType},
+	{FNAME("multiplexParameters") CHOICE, 1, 2, 3, DECODE | EXT | OPT,
+	 offsetof(OpenLogicalChannel_reverseLogicalChannelParameters,
+		  multiplexParameters),
+	 _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters},
+	{FNAME("reverseLogicalChannelDependency") INT, WORD, 1, 0, SKIP | OPT,
+	 0, NULL},
+	{FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _NetworkAccessParameters_distribution[] = {	/* CHOICE */
+	{FNAME("unicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("multicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Q2931Address_address[] = {	/* CHOICE */
+	{FNAME("internationalNumber") NUMSTR, 4, 1, 0, SKIP, 0, NULL},
+	{FNAME("nsapAddress") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Q2931Address[] = {	/* SEQUENCE */
+	{FNAME("address") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+	 _Q2931Address_address},
+	{FNAME("subaddress") OCTSTR, 5, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _NetworkAccessParameters_networkAddress[] = {	/* CHOICE */
+	{FNAME("q2931Address") SEQ, 1, 2, 2, SKIP | EXT, 0, _Q2931Address},
+	{FNAME("e164Address") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("localAreaAddress") CHOICE, 1, 2, 2, DECODE | EXT,
+	 offsetof(NetworkAccessParameters_networkAddress, localAreaAddress),
+	 _H245_TransportAddress},
+};
+
+static const struct field_t _NetworkAccessParameters[] = {	/* SEQUENCE */
+	{FNAME("distribution") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0,
+	 _NetworkAccessParameters_distribution},
+	{FNAME("networkAddress") CHOICE, 2, 3, 3, DECODE | EXT,
+	 offsetof(NetworkAccessParameters, networkAddress),
+	 _NetworkAccessParameters_networkAddress},
+	{FNAME("associateConference") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("externalReference") OCTSTR, 8, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("t120SetupProcedure") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+};
+
+static const struct field_t _OpenLogicalChannel[] = {	/* SEQUENCE */
+	{FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("forwardLogicalChannelParameters") SEQ, 1, 3, 5, DECODE | EXT,
+	 offsetof(OpenLogicalChannel, forwardLogicalChannelParameters),
+	 _OpenLogicalChannel_forwardLogicalChannelParameters},
+	{FNAME("reverseLogicalChannelParameters") SEQ, 1, 2, 4,
+	 DECODE | EXT | OPT, offsetof(OpenLogicalChannel,
+				      reverseLogicalChannelParameters),
+	 _OpenLogicalChannel_reverseLogicalChannelParameters},
+	{FNAME("separateStack") SEQ, 2, 4, 5, DECODE | EXT | OPT,
+	 offsetof(OpenLogicalChannel, separateStack),
+	 _NetworkAccessParameters},
+	{FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _Setup_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Setup_UUIE, h245Address), _TransportAddress},
+	{FNAME("sourceAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Setup_UUIE_sourceAddress},
+	{FNAME("sourceInfo") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+	{FNAME("destinationAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Setup_UUIE_destinationAddress},
+	{FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Setup_UUIE, destCallSignalAddress), _TransportAddress},
+	{FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Setup_UUIE_destExtraCallInfo},
+	{FNAME("destExtraCRV") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Setup_UUIE_destExtraCRV},
+	{FNAME("activeMC") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+	{FNAME("conferenceGoal") CHOICE, 2, 3, 5, SKIP | EXT, 0,
+	 _Setup_UUIE_conferenceGoal},
+	{FNAME("callServices") SEQ, 0, 8, 8, SKIP | EXT | OPT, 0,
+	 _QseriesOptions},
+	{FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType},
+	{FNAME("sourceCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Setup_UUIE, sourceCallSignalAddress), _TransportAddress},
+	{FNAME("remoteExtensionAddress") CHOICE, 1, 2, 7, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("h245SecurityCapability") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(Setup_UUIE, fastStart), _Setup_UUIE_fastStart},
+	{FNAME("mediaWaitForConnect") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("canOverlapSend") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("connectionParameters") SEQ, 0, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("language") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("symmetricOperationRequired") NUL, FIXD, 0, 0, SKIP | OPT, 0,
+	 NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("neededFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("desiredFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("supportedFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("parallelH245Control") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("additionalSourceAddresses") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 NULL},
+};
+
+static const struct field_t _CallProceeding_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _CallProceeding_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+	 _EndpointType},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(CallProceeding_UUIE, h245Address), _TransportAddress},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(CallProceeding_UUIE, fastStart),
+	 _CallProceeding_UUIE_fastStart},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Connect_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _Connect_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Connect_UUIE, h245Address), _TransportAddress},
+	{FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+	 _EndpointType},
+	{FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(Connect_UUIE, fastStart), _Connect_UUIE_fastStart},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("language") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("connectedAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Alerting_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _Alerting_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+	 _EndpointType},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Alerting_UUIE, h245Address), _TransportAddress},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(Alerting_UUIE, fastStart), _Alerting_UUIE_fastStart},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("alertingAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Information_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, SKIP | OPT, 0, NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _ReleaseCompleteReason[] = {	/* CHOICE */
+	{FNAME("noBandwidth") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("gatekeeperResources") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("unreachableDestination") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("destinationRejection") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("invalidRevision") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("noPermission") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("unreachableGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("gatewayResources") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("badFormatAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("adaptiveBusy") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("inConf") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("undefinedReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("facilityCallDeflection") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("securityDenied") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("calledPartyNotRegistered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("callerNotRegistered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("newConnectionNeeded") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardReason") SEQ, 0, 2, 2, SKIP, 0, NULL},
+	{FNAME("replaceWithConferenceInvite") OCTSTR, FIXD, 16, 0, SKIP, 0,
+	 NULL},
+	{FNAME("genericDataReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("neededFeatureNotSupported") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("tunnelledSignallingRejected") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _ReleaseComplete_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("reason") CHOICE, 4, 12, 22, SKIP | EXT | OPT, 0,
+	 _ReleaseCompleteReason},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("busyAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Facility_UUIE_alternativeAliasAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _FacilityReason[] = {	/* CHOICE */
+	{FNAME("routeCallToGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("callForwarded") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("routeCallToMC") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("undefinedReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("conferenceListChoice") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("startH245") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("noH245") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("newTokens") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("featureSetUpdate") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("forwardedElements") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("transportedInformation") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Facility_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _Facility_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("alternativeAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Facility_UUIE, alternativeAddress), _TransportAddress},
+	{FNAME("alternativeAliasAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Facility_UUIE_alternativeAliasAddress},
+	{FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL},
+	{FNAME("reason") CHOICE, 2, 4, 11, DECODE | EXT,
+	 offsetof(Facility_UUIE, reason), _FacilityReason},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+	{FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("remoteExtensionAddress") CHOICE, 1, 2, 7, SKIP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("conferences") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Facility_UUIE, h245Address), _TransportAddress},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(Facility_UUIE, fastStart), _Facility_UUIE_fastStart},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+	 NULL},
+};
+
+static const struct field_t _CallIdentifier[] = {	/* SEQUENCE */
+	{FNAME("guid") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SecurityServiceMode[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
+	{FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("default") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SecurityCapabilities[] = {	/* SEQUENCE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("encryption") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+	 _SecurityServiceMode},
+	{FNAME("authenticaton") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+	 _SecurityServiceMode},
+	{FNAME("integrity") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+	 _SecurityServiceMode},
+};
+
+static const struct field_t _H245Security[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
+	{FNAME("noSecurity") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("tls") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
+	{FNAME("ipsec") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
+};
+
+static const struct field_t _DHset[] = {	/* SEQUENCE */
+	{FNAME("halfkey") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("modSize") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+	{FNAME("generator") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TypedCertificate[] = {	/* SEQUENCE */
+	{FNAME("type") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("certificate") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H235_NonStandardParameter[] = {	/* SEQUENCE */
+	{FNAME("nonStandardIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _ClearToken[] = {	/* SEQUENCE */
+	{FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("timeStamp") INT, CONS, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("password") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("dhkey") SEQ, 0, 3, 3, SKIP | EXT | OPT, 0, _DHset},
+	{FNAME("challenge") OCTSTR, 7, 8, 0, SKIP | OPT, 0, NULL},
+	{FNAME("random") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("certificate") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0,
+	 _TypedCertificate},
+	{FNAME("generalID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _H235_NonStandardParameter},
+	{FNAME("eckasdhkey") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, NULL},
+	{FNAME("sendersID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _Progress_UUIE_tokens[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
+};
+
+static const struct field_t _Params[] = {	/* SEQUENCE */
+	{FNAME("ranInt") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("iv8") OCTSTR, FIXD, 8, 0, SKIP | OPT, 0, NULL},
+	{FNAME("iv16") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdHash_token[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdHash[] = {	/* SEQUENCE */
+	{FNAME("alias") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+	{FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
+	{FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoEPPwdHash_token},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdHash_token[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdHash[] = {	/* SEQUENCE */
+	{FNAME("gatekeeperId") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
+	{FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoGKPwdHash_token},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdEncr[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdEncr[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPCert[] = {	/* SEQUENCE */
+	{FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKCert[] = {	/* SEQUENCE */
+	{FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoFastStart[] = {	/* SEQUENCE */
+	{FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoEncryptedToken_token[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoEncryptedToken[] = {	/* SEQUENCE */
+	{FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoToken_cryptoEncryptedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoSignedToken_token[] = {	/* SEQUENCE */
+	{FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoSignedToken[] = {	/* SEQUENCE */
+	{FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("token") SEQ, 0, 4, 4, SKIP, 0,
+	 _CryptoToken_cryptoSignedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoHashedToken_token[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoHashedToken[] = {	/* SEQUENCE */
+	{FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("hashedVals") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
+	{FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoToken_cryptoHashedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoPwdEncr[] = {	/* SEQUENCE */
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+	{FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken[] = {	/* CHOICE */
+	{FNAME("cryptoEncryptedToken") SEQ, 0, 2, 2, SKIP, 0,
+	 _CryptoToken_cryptoEncryptedToken},
+	{FNAME("cryptoSignedToken") SEQ, 0, 2, 2, SKIP, 0,
+	 _CryptoToken_cryptoSignedToken},
+	{FNAME("cryptoHashedToken") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoToken_cryptoHashedToken},
+	{FNAME("cryptoPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoToken_cryptoPwdEncr},
+};
+
+static const struct field_t _CryptoH323Token[] = {	/* CHOICE */
+	{FNAME("cryptoEPPwdHash") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoEPPwdHash},
+	{FNAME("cryptoGKPwdHash") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoGKPwdHash},
+	{FNAME("cryptoEPPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoEPPwdEncr},
+	{FNAME("cryptoGKPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+	 _CryptoH323Token_cryptoGKPwdEncr},
+	{FNAME("cryptoEPCert") SEQ, 0, 4, 4, SKIP, 0,
+	 _CryptoH323Token_cryptoEPCert},
+	{FNAME("cryptoGKCert") SEQ, 0, 4, 4, SKIP, 0,
+	 _CryptoH323Token_cryptoGKCert},
+	{FNAME("cryptoFastStart") SEQ, 0, 4, 4, SKIP, 0,
+	 _CryptoH323Token_cryptoFastStart},
+	{FNAME("nestedcryptoToken") CHOICE, 2, 4, 4, SKIP | EXT, 0,
+	 _CryptoToken},
+};
+
+static const struct field_t _Progress_UUIE_cryptoTokens[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 8, 8, SKIP | EXT, 0, _CryptoH323Token},
+};
+
+static const struct field_t _Progress_UUIE_fastStart[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+	 sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+	,
+};
+
+static const struct field_t _Progress_UUIE[] = {	/* SEQUENCE */
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+	 _EndpointType},
+	{FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(Progress_UUIE, h245Address), _TransportAddress},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0,
+	 _CallIdentifier},
+	{FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+	 _H245Security},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Progress_UUIE_tokens},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _Progress_UUIE_cryptoTokens},
+	{FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+	 offsetof(Progress_UUIE, fastStart), _Progress_UUIE_fastStart},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H323_UU_PDU_h323_message_body[] = {	/* CHOICE */
+	{FNAME("setup") SEQ, 7, 13, 39, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, setup), _Setup_UUIE},
+	{FNAME("callProceeding") SEQ, 1, 3, 12, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, callProceeding),
+	 _CallProceeding_UUIE},
+	{FNAME("connect") SEQ, 1, 4, 19, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, connect), _Connect_UUIE},
+	{FNAME("alerting") SEQ, 1, 3, 17, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, alerting), _Alerting_UUIE},
+	{FNAME("information") SEQ, 0, 1, 7, SKIP | EXT, 0, _Information_UUIE},
+	{FNAME("releaseComplete") SEQ, 1, 2, 11, SKIP | EXT, 0,
+	 _ReleaseComplete_UUIE},
+	{FNAME("facility") SEQ, 3, 5, 21, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, facility), _Facility_UUIE},
+	{FNAME("progress") SEQ, 5, 8, 11, DECODE | EXT,
+	 offsetof(H323_UU_PDU_h323_message_body, progress), _Progress_UUIE},
+	{FNAME("empty") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("status") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+	{FNAME("statusInquiry") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+	{FNAME("setupAcknowledge") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+	{FNAME("notify") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _RequestMessage[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("masterSlaveDetermination") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("terminalCapabilitySet") SEQ, 3, 5, 5, STOP | EXT, 0, NULL},
+	{FNAME("openLogicalChannel") SEQ, 1, 3, 5, DECODE | EXT,
+	 offsetof(RequestMessage, openLogicalChannel), _OpenLogicalChannel},
+	{FNAME("closeLogicalChannel") SEQ, 0, 2, 3, STOP | EXT, 0, NULL},
+	{FNAME("requestChannelClose") SEQ, 0, 1, 3, STOP | EXT, 0, NULL},
+	{FNAME("multiplexEntrySend") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("requestMultiplexEntry") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("requestMode") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("roundTripDelayRequest") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("maintenanceLoopRequest") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("communicationModeRequest") SEQ, 0, 0, 0, STOP | EXT, 0, NULL},
+	{FNAME("conferenceRequest") CHOICE, 3, 8, 16, STOP | EXT, 0, NULL},
+	{FNAME("multilinkRequest") CHOICE, 3, 5, 5, STOP | EXT, 0, NULL},
+	{FNAME("logicalChannelRateRequest") SEQ, 0, 3, 3, STOP | EXT, 0,
+	 NULL},
+};
+
+static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters[] = {	/* CHOICE */
+	{FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
+	 _H222LogicalChannelParameters},
+	{FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+	 offsetof
+	 (OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters,
+	  h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+};
+
+static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters[] = {	/* SEQUENCE */
+	{FNAME("reverseLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("multiplexParameters") CHOICE, 0, 1, 2, DECODE | EXT | OPT,
+	 offsetof(OpenLogicalChannelAck_reverseLogicalChannelParameters,
+		  multiplexParameters),
+	 _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters},
+	{FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H2250LogicalChannelAckParameters_nonStandard[] = {	/* SEQUENCE OF */
+	{FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
+};
+
+static const struct field_t _H2250LogicalChannelAckParameters[] = {	/* SEQUENCE */
+	{FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _H2250LogicalChannelAckParameters_nonStandard},
+	{FNAME("sessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("mediaChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+	 offsetof(H2250LogicalChannelAckParameters, mediaChannel),
+	 _H245_TransportAddress},
+	{FNAME("mediaControlChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+	 offsetof(H2250LogicalChannelAckParameters, mediaControlChannel),
+	 _H245_TransportAddress},
+	{FNAME("dynamicRTPPayloadType") INT, 5, 96, 0, SKIP | OPT, 0, NULL},
+	{FNAME("flowControlToZero") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannelAck_forwardMultiplexAckParameters[] = {	/* CHOICE */
+	{FNAME("h2250LogicalChannelAckParameters") SEQ, 5, 5, 7, DECODE | EXT,
+	 offsetof(OpenLogicalChannelAck_forwardMultiplexAckParameters,
+		  h2250LogicalChannelAckParameters),
+	 _H2250LogicalChannelAckParameters},
+};
+
+static const struct field_t _OpenLogicalChannelAck[] = {	/* SEQUENCE */
+	{FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("reverseLogicalChannelParameters") SEQ, 2, 3, 4,
+	 DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck,
+				      reverseLogicalChannelParameters),
+	 _OpenLogicalChannelAck_reverseLogicalChannelParameters},
+	{FNAME("separateStack") SEQ, 2, 4, 5, DECODE | EXT | OPT,
+	 offsetof(OpenLogicalChannelAck, separateStack),
+	 _NetworkAccessParameters},
+	{FNAME("forwardMultiplexAckParameters") CHOICE, 0, 1, 1,
+	 DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck,
+				      forwardMultiplexAckParameters),
+	 _OpenLogicalChannelAck_forwardMultiplexAckParameters},
+	{FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _ResponseMessage[] = {	/* CHOICE */
+	{FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("masterSlaveDeterminationAck") SEQ, 0, 1, 1, STOP | EXT, 0,
+	 NULL},
+	{FNAME("masterSlaveDeterminationReject") SEQ, 0, 1, 1, STOP | EXT, 0,
+	 NULL},
+	{FNAME("terminalCapabilitySetAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("terminalCapabilitySetReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+	 NULL},
+	{FNAME("openLogicalChannelAck") SEQ, 1, 2, 5, DECODE | EXT,
+	 offsetof(ResponseMessage, openLogicalChannelAck),
+	 _OpenLogicalChannelAck},
+	{FNAME("openLogicalChannelReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("closeLogicalChannelAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("requestChannelCloseAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("requestChannelCloseReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+	 NULL},
+	{FNAME("multiplexEntrySendAck") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("multiplexEntrySendReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("requestMultiplexEntryAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("requestMultiplexEntryReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+	 NULL},
+	{FNAME("requestModeAck") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("requestModeReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("roundTripDelayResponse") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("maintenanceLoopAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("maintenanceLoopReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+	{FNAME("communicationModeResponse") CHOICE, 0, 1, 1, STOP | EXT, 0,
+	 NULL},
+	{FNAME("conferenceResponse") CHOICE, 3, 8, 16, STOP | EXT, 0, NULL},
+	{FNAME("multilinkResponse") CHOICE, 3, 5, 5, STOP | EXT, 0, NULL},
+	{FNAME("logicalChannelRateAcknowledge") SEQ, 0, 3, 3, STOP | EXT, 0,
+	 NULL},
+	{FNAME("logicalChannelRateReject") SEQ, 1, 4, 4, STOP | EXT, 0, NULL},
+};
+
+static const struct field_t _MultimediaSystemControlMessage[] = {	/* CHOICE */
+	{FNAME("request") CHOICE, 4, 11, 15, DECODE | EXT,
+	 offsetof(MultimediaSystemControlMessage, request), _RequestMessage},
+	{FNAME("response") CHOICE, 5, 19, 24, DECODE | EXT,
+	 offsetof(MultimediaSystemControlMessage, response),
+	 _ResponseMessage},
+	{FNAME("command") CHOICE, 3, 7, 12, STOP | EXT, 0, NULL},
+	{FNAME("indication") CHOICE, 4, 14, 23, STOP | EXT, 0, NULL},
+};
+
+static const struct field_t _H323_UU_PDU_h245Control[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 2, 4, 4, DECODE | OPEN | EXT,
+	 sizeof(MultimediaSystemControlMessage),
+	 _MultimediaSystemControlMessage}
+	,
+};
+
+static const struct field_t _H323_UU_PDU[] = {	/* SEQUENCE */
+	{FNAME("h323-message-body") CHOICE, 3, 7, 13, DECODE | EXT,
+	 offsetof(H323_UU_PDU, h323_message_body),
+	 _H323_UU_PDU_h323_message_body},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("h4501SupplementaryService") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 NULL},
+	{FNAME("h245Tunneling") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("h245Control") SEQOF, SEMI, 0, 4, DECODE | OPT,
+	 offsetof(H323_UU_PDU, h245Control), _H323_UU_PDU_h245Control},
+	{FNAME("nonStandardControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("callLinkage") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("tunnelledSignallingMessage") SEQ, 2, 4, 4, STOP | EXT | OPT,
+	 0, NULL},
+	{FNAME("provisionalRespToH245Tunneling") NUL, FIXD, 0, 0, STOP | OPT,
+	 0, NULL},
+	{FNAME("stimulusControl") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _H323_UserInformation[] = {	/* SEQUENCE */
+	{FNAME("h323-uu-pdu") SEQ, 1, 2, 11, DECODE | EXT,
+	 offsetof(H323_UserInformation, h323_uu_pdu), _H323_UU_PDU},
+	{FNAME("user-data") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperRequest[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(GatekeeperRequest, rasAddress), _TransportAddress},
+	{FNAME("endpointType") SEQ, 6, 8, 10, STOP | EXT, 0, NULL},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("callServices") SEQ, 0, 8, 8, STOP | EXT | OPT, 0, NULL},
+	{FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("authenticationCapability") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("algorithmOIDs") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrity") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("supportsAltGK") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperConfirm[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(GatekeeperConfirm, rasAddress), _TransportAddress},
+	{FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("authenticationMode") CHOICE, 3, 7, 8, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("algorithmOID") OID, BYTE, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrity") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RegistrationRequest_callSignalAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+	 sizeof(TransportAddress), _TransportAddress}
+	,
+};
+
+static const struct field_t _RegistrationRequest_rasAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+	 sizeof(TransportAddress), _TransportAddress}
+	,
+};
+
+static const struct field_t _RegistrationRequest_terminalAlias[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _RegistrationRequest[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("discoveryComplete") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+	 offsetof(RegistrationRequest, callSignalAddress),
+	 _RegistrationRequest_callSignalAddress},
+	{FNAME("rasAddress") SEQOF, SEMI, 0, 10, DECODE,
+	 offsetof(RegistrationRequest, rasAddress),
+	 _RegistrationRequest_rasAddress},
+	{FNAME("terminalType") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+	{FNAME("terminalAlias") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _RegistrationRequest_terminalAlias},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("endpointVendor") SEQ, 2, 3, 3, SKIP | EXT, 0,
+	 _VendorIdentifier},
+	{FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("timeToLive") INT, CONS, 1, 0, DECODE | OPT,
+	 offsetof(RegistrationRequest, timeToLive), NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("keepAlive") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("willSupplyUUIEs") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+	 0, NULL},
+	{FNAME("additiveRegistration") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("terminalAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("supportsAltGK") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("usageReportingCapability") SEQ, 3, 4, 4, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("supportedH248Packages") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("callCreditCapability") SEQ, 2, 2, 2, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("capacityReportingCapability") SEQ, 0, 1, 1, STOP | EXT | OPT,
+	 0, NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RegistrationConfirm_callSignalAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+	 sizeof(TransportAddress), _TransportAddress}
+	,
+};
+
+static const struct field_t _RegistrationConfirm_terminalAlias[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _RegistrationConfirm[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+	 offsetof(RegistrationConfirm, callSignalAddress),
+	 _RegistrationConfirm_callSignalAddress},
+	{FNAME("terminalAlias") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _RegistrationConfirm_terminalAlias},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+	{FNAME("timeToLive") INT, CONS, 1, 0, DECODE | OPT,
+	 offsetof(RegistrationConfirm, timeToLive), NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("willRespondToIRR") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("preGrantedARQ") SEQ, 0, 4, 8, STOP | EXT | OPT, 0, NULL},
+	{FNAME("maintainConnection") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("supportsAdditiveRegistration") NUL, FIXD, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("terminalAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("usageSpec") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("featureServerAlias") CHOICE, 1, 2, 7, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("capacityReportingSpec") SEQ, 0, 1, 1, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _UnregistrationRequest_callSignalAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+	 sizeof(TransportAddress), _TransportAddress}
+	,
+};
+
+static const struct field_t _UnregistrationRequest[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+	 offsetof(UnregistrationRequest, callSignalAddress),
+	 _UnregistrationRequest_callSignalAddress},
+	{FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("reason") CHOICE, 2, 4, 5, STOP | EXT | OPT, 0, NULL},
+	{FNAME("endpointAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _CallModel[] = {	/* CHOICE */
+	{FNAME("direct") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+	{FNAME("gatekeeperRouted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _AdmissionRequest_destinationInfo[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest_destExtraCallInfo[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest_srcInfo[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType},
+	{FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, _CallModel},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _AdmissionRequest_destinationInfo},
+	{FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(AdmissionRequest, destCallSignalAddress),
+	 _TransportAddress},
+	{FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+	 _AdmissionRequest_destExtraCallInfo},
+	{FNAME("srcInfo") SEQOF, SEMI, 0, 0, SKIP, 0,
+	 _AdmissionRequest_srcInfo},
+	{FNAME("srcCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+	 offsetof(AdmissionRequest, srcCallSignalAddress), _TransportAddress},
+	{FNAME("bandWidth") INT, CONS, 0, 0, STOP, 0, NULL},
+	{FNAME("callReferenceValue") INT, WORD, 0, 0, STOP, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("callServices") SEQ, 0, 8, 8, STOP | EXT | OPT, 0, NULL},
+	{FNAME("conferenceID") OCTSTR, FIXD, 16, 0, STOP, 0, NULL},
+	{FNAME("activeMC") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("answerCall") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("canMapAlias") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("callIdentifier") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+	{FNAME("srcAlternatives") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destAlternatives") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("transportQOS") CHOICE, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("willSupplyUUIEs") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("callLinkage") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("gatewayDataRate") SEQ, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("desiredTunnelledProtocol") SEQ, 1, 2, 2, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _AdmissionConfirm[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("bandWidth") INT, CONS, 0, 0, SKIP, 0, NULL},
+	{FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT, 0, _CallModel},
+	{FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(AdmissionConfirm, destCallSignalAddress),
+	 _TransportAddress},
+	{FNAME("irrFrequency") INT, WORD, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("destinationInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destinationType") SEQ, 6, 8, 10, STOP | EXT | OPT, 0, NULL},
+	{FNAME("remoteExtensionAddress") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("transportQOS") CHOICE, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("willRespondToIRR") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("uuiesRequested") SEQ, 0, 9, 13, STOP | EXT, 0, NULL},
+	{FNAME("language") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+	 0, NULL},
+	{FNAME("useSpecifiedTransport") CHOICE, 1, 2, 2, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("usageSpec") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("supportedProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _LocationRequest_destinationInfo[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _LocationRequest[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+	{FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP, 0,
+	 _LocationRequest_destinationInfo},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("replyAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(LocationRequest, replyAddress), _TransportAddress},
+	{FNAME("sourceInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("canMapAlias") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("desiredTunnelledProtocol") SEQ, 1, 2, 2, STOP | EXT | OPT, 0,
+	 NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("hopCount") INT, 8, 1, 0, STOP | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _LocationConfirm[] = {	/* SEQUENCE */
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("callSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(LocationConfirm, callSignalAddress), _TransportAddress},
+	{FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(LocationConfirm, rasAddress), _TransportAddress},
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("destinationInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("destinationType") SEQ, 6, 8, 10, STOP | EXT | OPT, 0, NULL},
+	{FNAME("remoteExtensionAddress") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+	 NULL},
+	{FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+	 0, NULL},
+	{FNAME("supportedProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+	{FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _InfoRequestResponse_callSignalAddress[] = {	/* SEQUENCE OF */
+	{FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+	 sizeof(TransportAddress), _TransportAddress}
+	,
+};
+
+static const struct field_t _InfoRequestResponse[] = {	/* SEQUENCE */
+	{FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+	 _NonStandardParameter},
+	{FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+	{FNAME("endpointType") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+	{FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+	{FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+	 offsetof(InfoRequestResponse, rasAddress), _TransportAddress},
+	{FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+	 offsetof(InfoRequestResponse, callSignalAddress),
+	 _InfoRequestResponse_callSignalAddress},
+	{FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("perCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+	{FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+	{FNAME("needResponse") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+	{FNAME("irrStatus") CHOICE, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+	{FNAME("unsolicited") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+	{FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RasMessage[] = {	/* CHOICE */
+	{FNAME("gatekeeperRequest") SEQ, 4, 8, 18, DECODE | EXT,
+	 offsetof(RasMessage, gatekeeperRequest), _GatekeeperRequest},
+	{FNAME("gatekeeperConfirm") SEQ, 2, 5, 14, DECODE | EXT,
+	 offsetof(RasMessage, gatekeeperConfirm), _GatekeeperConfirm},
+	{FNAME("gatekeeperReject") SEQ, 2, 5, 11, STOP | EXT, 0, NULL},
+	{FNAME("registrationRequest") SEQ, 3, 10, 31, DECODE | EXT,
+	 offsetof(RasMessage, registrationRequest), _RegistrationRequest},
+	{FNAME("registrationConfirm") SEQ, 3, 7, 24, DECODE | EXT,
+	 offsetof(RasMessage, registrationConfirm), _RegistrationConfirm},
+	{FNAME("registrationReject") SEQ, 2, 5, 11, STOP | EXT, 0, NULL},
+	{FNAME("unregistrationRequest") SEQ, 3, 5, 15, DECODE | EXT,
+	 offsetof(RasMessage, unregistrationRequest), _UnregistrationRequest},
+	{FNAME("unregistrationConfirm") SEQ, 1, 2, 6, STOP | EXT, 0, NULL},
+	{FNAME("unregistrationReject") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+	{FNAME("admissionRequest") SEQ, 7, 16, 34, DECODE | EXT,
+	 offsetof(RasMessage, admissionRequest), _AdmissionRequest},
+	{FNAME("admissionConfirm") SEQ, 2, 6, 27, DECODE | EXT,
+	 offsetof(RasMessage, admissionConfirm), _AdmissionConfirm},
+	{FNAME("admissionReject") SEQ, 1, 3, 11, STOP | EXT, 0, NULL},
+	{FNAME("bandwidthRequest") SEQ, 2, 7, 18, STOP | EXT, 0, NULL},
+	{FNAME("bandwidthConfirm") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+	{FNAME("bandwidthReject") SEQ, 1, 4, 9, STOP | EXT, 0, NULL},
+	{FNAME("disengageRequest") SEQ, 1, 6, 19, STOP | EXT, 0, NULL},
+	{FNAME("disengageConfirm") SEQ, 1, 2, 9, STOP | EXT, 0, NULL},
+	{FNAME("disengageReject") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+	{FNAME("locationRequest") SEQ, 2, 5, 17, DECODE | EXT,
+	 offsetof(RasMessage, locationRequest), _LocationRequest},
+	{FNAME("locationConfirm") SEQ, 1, 4, 19, DECODE | EXT,
+	 offsetof(RasMessage, locationConfirm), _LocationConfirm},
+	{FNAME("locationReject") SEQ, 1, 3, 10, STOP | EXT, 0, NULL},
+	{FNAME("infoRequest") SEQ, 2, 4, 15, STOP | EXT, 0, NULL},
+	{FNAME("infoRequestResponse") SEQ, 3, 8, 16, DECODE | EXT,
+	 offsetof(RasMessage, infoRequestResponse), _InfoRequestResponse},
+	{FNAME("nonStandardMessage") SEQ, 0, 2, 7, STOP | EXT, 0, NULL},
+	{FNAME("unknownMessageResponse") SEQ, 0, 1, 5, STOP | EXT, 0, NULL},
+	{FNAME("requestInProgress") SEQ, 4, 6, 6, STOP | EXT, 0, NULL},
+	{FNAME("resourcesAvailableIndicate") SEQ, 4, 9, 11, STOP | EXT, 0,
+	 NULL},
+	{FNAME("resourcesAvailableConfirm") SEQ, 4, 6, 7, STOP | EXT, 0,
+	 NULL},
+	{FNAME("infoRequestAck") SEQ, 4, 5, 5, STOP | EXT, 0, NULL},
+	{FNAME("infoRequestNak") SEQ, 5, 7, 7, STOP | EXT, 0, NULL},
+	{FNAME("serviceControlIndication") SEQ, 8, 10, 10, STOP | EXT, 0,
+	 NULL},
+	{FNAME("serviceControlResponse") SEQ, 7, 8, 8, STOP | EXT, 0, NULL},
+};
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
new file mode 100644
index 000000000..bf09a1e06
--- /dev/null
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Helper handling for netfilter. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/random.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_log.h>
+
+static DEFINE_MUTEX(nf_ct_helper_mutex);
+struct hlist_head *nf_ct_helper_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_helper_hash);
+unsigned int nf_ct_helper_hsize __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
+static unsigned int nf_ct_helper_count __read_mostly;
+
+static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
+static struct list_head nf_ct_nat_helpers __read_mostly;
+
+/* Stupid hash, but collision free for the default registrations of the
+ * helpers currently in the kernel. */
+static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
+{
+	return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^
+		(__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
+}
+
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+	unsigned int i;
+
+	for (i = 0; i < nf_ct_helper_hsize; i++) {
+		hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
+			if (strcmp(h->name, name))
+				continue;
+
+			if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
+			    h->tuple.src.l3num != l3num)
+				continue;
+
+			if (h->tuple.dst.protonum == protonum)
+				return h;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
+
+struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+
+	rcu_read_lock();
+
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+#ifdef CONFIG_MODULES
+	if (h == NULL) {
+		rcu_read_unlock();
+		if (request_module("nfct-helper-%s", name) == 0) {
+			rcu_read_lock();
+			h = __nf_conntrack_helper_find(name, l3num, protonum);
+		} else {
+			return h;
+		}
+	}
+#endif
+	if (h != NULL && !try_module_get(h->me))
+		h = NULL;
+	if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
+		module_put(h->me);
+		h = NULL;
+	}
+
+	rcu_read_unlock();
+
+	return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
+
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	refcount_dec(&helper->refcnt);
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
+static struct nf_conntrack_nat_helper *
+nf_conntrack_nat_helper_find(const char *mod_name)
+{
+	struct nf_conntrack_nat_helper *cur;
+	bool found = false;
+
+	list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) {
+		if (!strcmp(cur->mod_name, mod_name)) {
+			found = true;
+			break;
+		}
+	}
+	return found ? cur : NULL;
+}
+
+int
+nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+	struct nf_conntrack_nat_helper *nat;
+	char mod_name[NF_CT_HELPER_NAME_LEN];
+	int ret = 0;
+
+	rcu_read_lock();
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+	if (!h) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	nat = nf_conntrack_nat_helper_find(h->nat_mod_name);
+	if (!nat) {
+		snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name);
+		rcu_read_unlock();
+		request_module("%s", mod_name);
+
+		rcu_read_lock();
+		nat = nf_conntrack_nat_helper_find(mod_name);
+		if (!nat) {
+			rcu_read_unlock();
+			return -ENOENT;
+		}
+	}
+
+	if (!try_module_get(nat->module))
+		ret = -ENOENT;
+
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get);
+
+void nf_nat_helper_put(struct nf_conntrack_helper *helper)
+{
+	struct nf_conntrack_nat_helper *nat;
+
+	nat = nf_conntrack_nat_helper_find(helper->nat_mod_name);
+	if (WARN_ON_ONCE(!nat))
+		return;
+
+	module_put(nat->module);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_put);
+
+struct nf_conn_help *
+nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+	struct nf_conn_help *help;
+
+	help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp);
+	if (help)
+		INIT_HLIST_HEAD(&help->expectations);
+	else
+		pr_debug("failed to add helper extension area");
+	return help;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+
+int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+			      gfp_t flags)
+{
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conn_help *help;
+
+	/* We already got a helper explicitly attached. The function
+	 * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
+	 * the helper up again. Since now the user is in full control of
+	 * making consistent helper configurations, skip this automatic
+	 * re-lookup, otherwise we'll lose the helper.
+	 */
+	if (test_bit(IPS_HELPER_BIT, &ct->status))
+		return 0;
+
+	if (WARN_ON_ONCE(!tmpl))
+		return 0;
+
+	help = nfct_help(tmpl);
+	if (help != NULL) {
+		helper = rcu_dereference(help->helper);
+		set_bit(IPS_HELPER_BIT, &ct->status);
+	}
+
+	help = nfct_help(ct);
+
+	if (helper == NULL) {
+		if (help)
+			RCU_INIT_POINTER(help->helper, NULL);
+		return 0;
+	}
+
+	if (help == NULL) {
+		help = nf_ct_helper_ext_add(ct, flags);
+		if (help == NULL)
+			return -ENOMEM;
+	} else {
+		/* We only allow helper re-assignment of the same sort since
+		 * we cannot reallocate the helper extension area.
+		 */
+		struct nf_conntrack_helper *tmp = rcu_dereference(help->helper);
+
+		if (tmp && tmp->help != helper->help) {
+			RCU_INIT_POINTER(help->helper, NULL);
+			return 0;
+		}
+	}
+
+	rcu_assign_pointer(help->helper, helper);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
+
+/* appropriate ct lock protecting must be taken by caller */
+static int unhelp(struct nf_conn *ct, void *me)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+
+	if (help && rcu_dereference_raw(help->helper) == me) {
+		nf_conntrack_event(IPCT_HELPER, ct);
+		RCU_INIT_POINTER(help->helper, NULL);
+	}
+
+	/* We are not intended to delete this conntrack. */
+	return 0;
+}
+
+void nf_ct_helper_destroy(struct nf_conn *ct)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
+
+	if (help) {
+		rcu_read_lock();
+		helper = rcu_dereference(help->helper);
+		if (helper && helper->destroy)
+			helper->destroy(ct);
+		rcu_read_unlock();
+	}
+}
+
+static LIST_HEAD(nf_ct_helper_expectfn_list);
+
+void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
+{
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
+
+void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
+{
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	list_del_rcu(&n->head);
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
+
+/* Caller should hold the rcu lock */
+struct nf_ct_helper_expectfn *
+nf_ct_helper_expectfn_find_by_name(const char *name)
+{
+	struct nf_ct_helper_expectfn *cur;
+	bool found = false;
+
+	list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
+		if (!strcmp(cur->name, name)) {
+			found = true;
+			break;
+		}
+	}
+	return found ? cur : NULL;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name);
+
+/* Caller should hold the rcu lock */
+struct nf_ct_helper_expectfn *
+nf_ct_helper_expectfn_find_by_symbol(const void *symbol)
+{
+	struct nf_ct_helper_expectfn *cur;
+	bool found = false;
+
+	list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
+		if (cur->expectfn == symbol) {
+			found = true;
+			break;
+		}
+	}
+	return found ? cur : NULL;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol);
+
+__printf(3, 4)
+void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
+		      const char *fmt, ...)
+{
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	/* Called from the helper function, this call never fails */
+	help = nfct_help(ct);
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+
+	nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
+		      "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_log);
+
+int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
+{
+	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
+	unsigned int h = helper_hash(&me->tuple);
+	struct nf_conntrack_helper *cur;
+	int ret = 0, i;
+
+	BUG_ON(me->expect_policy == NULL);
+	BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
+	BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+
+	if (!nf_ct_helper_hash)
+		return -ENOENT;
+
+	if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+		return -EINVAL;
+
+	mutex_lock(&nf_ct_helper_mutex);
+	for (i = 0; i < nf_ct_helper_hsize; i++) {
+		hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+			if (!strcmp(cur->name, me->name) &&
+			    (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+			     cur->tuple.src.l3num == me->tuple.src.l3num) &&
+			    cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+				ret = -EEXIST;
+				goto out;
+			}
+		}
+	}
+
+	/* avoid unpredictable behaviour for auto_assign_helper */
+	if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+		hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+			if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+						     &mask)) {
+				ret = -EEXIST;
+				goto out;
+			}
+		}
+	}
+	refcount_set(&me->refcnt, 1);
+	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
+	nf_ct_helper_count++;
+out:
+	mutex_unlock(&nf_ct_helper_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
+
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
+{
+	struct nf_conn_help *help = nfct_help(exp->master);
+	const struct nf_conntrack_helper *me = data;
+	const struct nf_conntrack_helper *this;
+
+	if (exp->helper == me)
+		return true;
+
+	this = rcu_dereference_protected(help->helper,
+					 lockdep_is_held(&nf_conntrack_expect_lock));
+	return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+	mutex_lock(&nf_ct_helper_mutex);
+	hlist_del_rcu(&me->hnode);
+	nf_ct_helper_count--;
+	mutex_unlock(&nf_ct_helper_mutex);
+
+	/* Make sure every nothing is still using the helper unless its a
+	 * connection in the hash.
+	 */
+	synchronize_rcu();
+
+	nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
+	nf_ct_iterate_destroy(unhelp, me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
+
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+		       u16 l3num, u16 protonum, const char *name,
+		       u16 default_port, u16 spec_port, u32 id,
+		       const struct nf_conntrack_expect_policy *exp_pol,
+		       u32 expect_class_max,
+		       int (*help)(struct sk_buff *skb, unsigned int protoff,
+				   struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo),
+		       int (*from_nlattr)(struct nlattr *attr,
+					  struct nf_conn *ct),
+		       struct module *module)
+{
+	helper->tuple.src.l3num = l3num;
+	helper->tuple.dst.protonum = protonum;
+	helper->tuple.src.u.all = htons(spec_port);
+	helper->expect_policy = exp_pol;
+	helper->expect_class_max = expect_class_max;
+	helper->help = help;
+	helper->from_nlattr = from_nlattr;
+	helper->me = module;
+	snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name),
+		 NF_NAT_HELPER_PREFIX "%s", name);
+
+	if (spec_port == default_port)
+		snprintf(helper->name, sizeof(helper->name), "%s", name);
+	else
+		snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_init);
+
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper,
+				  unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = nf_conntrack_helper_register(&helper[i]);
+		if (err < 0)
+			goto err;
+	}
+
+	return err;
+err:
+	if (i > 0)
+		nf_conntrack_helpers_unregister(helper, i);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register);
+
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper,
+				unsigned int n)
+{
+	while (n-- > 0)
+		nf_conntrack_helper_unregister(&helper[n]);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
+
+void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat)
+{
+	mutex_lock(&nf_ct_nat_helpers_mutex);
+	list_add_rcu(&nat->list, &nf_ct_nat_helpers);
+	mutex_unlock(&nf_ct_nat_helpers_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_register);
+
+void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat)
+{
+	mutex_lock(&nf_ct_nat_helpers_mutex);
+	list_del_rcu(&nat->list);
+	mutex_unlock(&nf_ct_nat_helpers_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_helper_unregister);
+
+int nf_conntrack_helper_init(void)
+{
+	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+	nf_ct_helper_hash =
+		nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+	if (!nf_ct_helper_hash)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&nf_ct_nat_helpers);
+	return 0;
+}
+
+void nf_conntrack_helper_fini(void)
+{
+	kvfree(nf_ct_helper_hash);
+	nf_ct_helper_hash = NULL;
+}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
new file mode 100644
index 000000000..5703846be
--- /dev/null
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* IRC extension for IP connection tracking, Version 1.21
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * based on RR's ip_conntrack_ftp.c
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+static unsigned int max_dcc_channels = 8;
+static unsigned int dcc_timeout __read_mostly = 300;
+/* This is slow, but it's simple. --RR */
+static char *irc_buffer;
+static DEFINE_SPINLOCK(irc_buffer_lock);
+
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
+				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
+				unsigned int matchoff,
+				unsigned int matchlen,
+				struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
+
+#define HELPER_NAME "irc"
+#define MAX_SEARCH_SIZE	4095
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_irc");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+module_param(max_dcc_channels, uint, 0400);
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per "
+				   "IRC session");
+module_param(dcc_timeout, uint, 0400);
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
+
+static const char *const dccprotos[] = {
+	"SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT "
+};
+
+#define MINMATCHLEN	5
+
+/* tries to get the ip_addr and port out of a dcc command
+ * return value: -1 on failure, 0 on success
+ *	data		pointer to first byte of DCC command data
+ *	data_end	pointer to last byte of dcc command data
+ *	ip		returns parsed ip of dcc command
+ *	port		returns parsed port of dcc command
+ *	ad_beg_p	returns pointer to first byte of addr data
+ *	ad_end_p	returns pointer to last byte of addr data
+ */
+static int parse_dcc(char *data, const char *data_end, __be32 *ip,
+		     u_int16_t *port, char **ad_beg_p, char **ad_end_p)
+{
+	char *tmp;
+
+	/* at least 12: "AAAAAAAA P\1\n" */
+	while (*data++ != ' ')
+		if (data > data_end - 12)
+			return -1;
+
+	/* Make sure we have a newline character within the packet boundaries
+	 * because simple_strtoul parses until the first invalid character. */
+	for (tmp = data; tmp <= data_end; tmp++)
+		if (*tmp == '\n')
+			break;
+	if (tmp > data_end || *tmp != '\n')
+		return -1;
+
+	*ad_beg_p = data;
+	*ip = cpu_to_be32(simple_strtoul(data, &data, 10));
+
+	/* skip blanks between ip and port */
+	while (*data == ' ') {
+		if (data >= data_end)
+			return -1;
+		data++;
+	}
+
+	*port = simple_strtoul(data, &data, 10);
+	*ad_end_p = data;
+
+	return 0;
+}
+
+static int help(struct sk_buff *skb, unsigned int protoff,
+		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff;
+	const struct iphdr *iph;
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	const char *data_limit;
+	char *data, *ib_ptr;
+	int dir = CTINFO2DIR(ctinfo);
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple *tuple;
+	__be32 dcc_ip;
+	u_int16_t dcc_port;
+	__be16 port;
+	int i, ret = NF_ACCEPT;
+	char *addr_beg_p, *addr_end_p;
+	typeof(nf_nat_irc_hook) nf_nat_irc;
+	unsigned int datalen;
+
+	/* If packet is coming from IRC server */
+	if (dir == IP_CT_DIR_REPLY)
+		return NF_ACCEPT;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	/* Not a full tcp header? */
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	/* No data? */
+	dataoff = protoff + th->doff*4;
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	datalen = skb->len - dataoff;
+	if (datalen > MAX_SEARCH_SIZE)
+		datalen = MAX_SEARCH_SIZE;
+
+	spin_lock_bh(&irc_buffer_lock);
+	ib_ptr = skb_header_pointer(skb, dataoff, datalen,
+				    irc_buffer);
+	if (!ib_ptr) {
+		spin_unlock_bh(&irc_buffer_lock);
+		return NF_ACCEPT;
+	}
+
+	data = ib_ptr;
+	data_limit = ib_ptr + datalen;
+
+	/* Skip any whitespace */
+	while (data < data_limit - 10) {
+		if (*data == ' ' || *data == '\r' || *data == '\n')
+			data++;
+		else
+			break;
+	}
+
+	/* strlen("PRIVMSG x ")=10 */
+	if (data < data_limit - 10) {
+		if (strncasecmp("PRIVMSG ", data, 8))
+			goto out;
+		data += 8;
+	}
+
+	/* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
+	 * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
+	 */
+	while (data < data_limit - (21 + MINMATCHLEN)) {
+		/* Find first " :", the start of message */
+		if (memcmp(data, " :", 2)) {
+			data++;
+			continue;
+		}
+		data += 2;
+
+		/* then check that place only for the DCC command */
+		if (memcmp(data, "\1DCC ", 5))
+			goto out;
+		data += 5;
+		/* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
+
+		iph = ip_hdr(skb);
+		pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
+			 &iph->saddr, ntohs(th->source),
+			 &iph->daddr, ntohs(th->dest));
+
+		for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
+			if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
+				/* no match */
+				continue;
+			}
+			data += strlen(dccprotos[i]);
+			pr_debug("DCC %s detected\n", dccprotos[i]);
+
+			/* we have at least
+			 * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
+			 * data left (== 14/13 bytes) */
+			if (parse_dcc(data, data_limit, &dcc_ip,
+				       &dcc_port, &addr_beg_p, &addr_end_p)) {
+				pr_debug("unable to parse dcc command\n");
+				continue;
+			}
+
+			pr_debug("DCC bound ip/port: %pI4:%u\n",
+				 &dcc_ip, dcc_port);
+
+			/* dcc_ip can be the internal OR external (NAT'ed) IP */
+			tuple = &ct->tuplehash[dir].tuple;
+			if ((tuple->src.u3.ip != dcc_ip &&
+			     ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
+			    dcc_port == 0) {
+				net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
+						     &tuple->src.u3.ip,
+						     &dcc_ip, dcc_port);
+				continue;
+			}
+
+			exp = nf_ct_expect_alloc(ct);
+			if (exp == NULL) {
+				nf_ct_helper_log(skb, ct,
+						 "cannot alloc expectation");
+				ret = NF_DROP;
+				goto out;
+			}
+			tuple = &ct->tuplehash[!dir].tuple;
+			port = htons(dcc_port);
+			nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+					  tuple->src.l3num,
+					  NULL, &tuple->dst.u3,
+					  IPPROTO_TCP, NULL, &port);
+
+			nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
+			if (nf_nat_irc && ct->status & IPS_NAT_MASK)
+				ret = nf_nat_irc(skb, ctinfo, protoff,
+						 addr_beg_p - ib_ptr,
+						 addr_end_p - addr_beg_p,
+						 exp);
+			else if (nf_ct_expect_related(exp, 0) != 0) {
+				nf_ct_helper_log(skb, ct,
+						 "cannot add expectation");
+				ret = NF_DROP;
+			}
+			nf_ct_expect_put(exp);
+			goto out;
+		}
+	}
+ out:
+	spin_unlock_bh(&irc_buffer_lock);
+	return ret;
+}
+
+static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
+static struct nf_conntrack_expect_policy irc_exp_policy;
+
+static int __init nf_conntrack_irc_init(void)
+{
+	int i, ret;
+
+	if (max_dcc_channels < 1) {
+		pr_err("max_dcc_channels must not be zero\n");
+		return -EINVAL;
+	}
+
+	if (max_dcc_channels > NF_CT_EXPECT_MAX_CNT) {
+		pr_err("max_dcc_channels must not be more than %u\n",
+		       NF_CT_EXPECT_MAX_CNT);
+		return -EINVAL;
+	}
+
+	irc_exp_policy.max_expected = max_dcc_channels;
+	irc_exp_policy.timeout = dcc_timeout;
+
+	irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL);
+	if (!irc_buffer)
+		return -ENOMEM;
+
+	/* If no port given, default to standard irc port */
+	if (ports_c == 0)
+		ports[ports_c++] = IRC_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, HELPER_NAME,
+				  IRC_PORT, ports[i], i, &irc_exp_policy,
+				  0, help, NULL, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(&irc[0], ports_c);
+	if (ret) {
+		pr_err("failed to register helpers\n");
+		kfree(irc_buffer);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit nf_conntrack_irc_fini(void)
+{
+	nf_conntrack_helpers_unregister(irc, ports_c);
+	kfree(irc_buffer);
+}
+
+module_init(nf_conntrack_irc_init);
+module_exit(nf_conntrack_irc_fini);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
new file mode 100644
index 000000000..6e70e137a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * test/set flag bits stored in conntrack extension area.
+ *
+ * (C) 2013 Astaro GmbH & Co KG
+ */
+
+#include <linux/export.h>
+#include <linux/types.h>
+
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+
+static DEFINE_SPINLOCK(nf_connlabels_lock);
+
+static int replace_u32(u32 *address, u32 mask, u32 new)
+{
+	u32 old, tmp;
+
+	do {
+		old = *address;
+		tmp = (old & mask) ^ new;
+		if (old == tmp)
+			return 0;
+	} while (cmpxchg(address, old, tmp) != old);
+
+	return 1;
+}
+
+int nf_connlabels_replace(struct nf_conn *ct,
+			  const u32 *data,
+			  const u32 *mask, unsigned int words32)
+{
+	struct nf_conn_labels *labels;
+	unsigned int size, i;
+	int changed = 0;
+	u32 *dst;
+
+	labels = nf_ct_labels_find(ct);
+	if (!labels)
+		return -ENOSPC;
+
+	size = sizeof(labels->bits);
+	if (size < (words32 * sizeof(u32)))
+		words32 = size / sizeof(u32);
+
+	dst = (u32 *) labels->bits;
+	for (i = 0; i < words32; i++)
+		changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]);
+
+	size /= sizeof(u32);
+	for (i = words32; i < size; i++) /* pad */
+		replace_u32(&dst[i], 0, 0);
+
+	if (changed)
+		nf_conntrack_event_cache(IPCT_LABEL, ct);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_replace);
+
+int nf_connlabels_get(struct net *net, unsigned int bits)
+{
+	if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
+		return -ERANGE;
+
+	spin_lock(&nf_connlabels_lock);
+	net->ct.labels_used++;
+	spin_unlock(&nf_connlabels_lock);
+
+	BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_get);
+
+void nf_connlabels_put(struct net *net)
+{
+	spin_lock(&nf_connlabels_lock);
+	net->ct.labels_used--;
+	spin_unlock(&nf_connlabels_lock);
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_put);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
new file mode 100644
index 000000000..55415f011
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *      NetBIOS name service broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ */
+/*
+ *      This helper tracks locally originating NetBIOS name service
+ *      requests by issuing permanent expectations (valid until
+ *      timing out) matching all reply connections from the
+ *      destination network. The only NetBIOS specific thing is
+ *      actually the port number.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define HELPER_NAME	"netbios-ns"
+#define NMBD_PORT	137
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_netbios_ns");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+static unsigned int timeout __read_mostly = 3;
+module_param(timeout, uint, 0400);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+static struct nf_conntrack_expect_policy exp_policy = {
+	.max_expected	= 1,
+};
+
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+			   struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo)
+{
+	return nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
+}
+
+static struct nf_conntrack_helper helper __read_mostly = {
+	.name			= HELPER_NAME,
+	.tuple.src.l3num	= NFPROTO_IPV4,
+	.tuple.src.u.udp.port	= cpu_to_be16(NMBD_PORT),
+	.tuple.dst.protonum	= IPPROTO_UDP,
+	.me			= THIS_MODULE,
+	.help			= netbios_ns_help,
+	.expect_policy		= &exp_policy,
+};
+
+static int __init nf_conntrack_netbios_ns_init(void)
+{
+	NF_CT_HELPER_BUILD_BUG_ON(0);
+
+	exp_policy.timeout = timeout;
+	return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_netbios_ns_fini(void)
+{
+	nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_netbios_ns_init);
+module_exit(nf_conntrack_netbios_ns_fini);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 000000000..9ee8abd3e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,3918 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005-2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Initial connection tracking via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/rculist.h>
+#include <linux/rculist_nulls.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/security.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/siphash.h>
+
+#include <linux/netfilter.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#endif
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+#include "nf_internals.h"
+
+MODULE_LICENSE("GPL");
+
+struct ctnetlink_list_dump_ctx {
+	struct nf_conn *last;
+	unsigned int cpu;
+	bool done;
+};
+
+static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_l4proto *l4proto)
+{
+	int ret = 0;
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum))
+		goto nla_put_failure;
+
+	if (likely(l4proto->tuple_to_nlattr))
+		ret = l4proto->tuple_to_nlattr(skb, tuple);
+
+	nla_nest_end(skb, nest_parms);
+
+	return ret;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+	    nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+	    nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
+				    const struct nf_conntrack_tuple *tuple)
+{
+	int ret = 0;
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_IP);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_tuple_to_nlattr(skb, tuple);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_tuple_to_nlattr(skb, tuple);
+		break;
+	}
+
+	nla_nest_end(skb, nest_parms);
+
+	return ret;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_tuples(struct sk_buff *skb,
+				 const struct nf_conntrack_tuple *tuple)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	int ret;
+
+	rcu_read_lock();
+	ret = ctnetlink_dump_tuples_ip(skb, tuple);
+
+	if (ret >= 0) {
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
+		ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+static int ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype,
+				  const struct nf_conntrack_zone *zone, int dir)
+{
+	if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir)
+		return 0;
+	if (nla_put_be16(skb, attrtype, htons(zone->id)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct,
+				  bool skip_zero)
+{
+	long timeout;
+
+	if (nf_ct_is_confirmed(ct))
+		timeout = nf_ct_expires(ct) / HZ;
+	else
+		timeout = ct->timeout / HZ;
+
+	if (skip_zero && timeout == 0)
+		return 0;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct,
+				    bool destroy)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nlattr *nest_proto;
+	int ret;
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+	if (!l4proto->to_nlattr)
+		return 0;
+
+	nest_proto = nla_nest_start(skb, CTA_PROTOINFO);
+	if (!nest_proto)
+		goto nla_put_failure;
+
+	ret = l4proto->to_nlattr(skb, nest_proto, ct, destroy);
+
+	nla_nest_end(skb, nest_proto);
+
+	return ret;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
+				   const struct nf_conn *ct)
+{
+	struct nlattr *nest_helper;
+	const struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
+
+	if (!help)
+		return 0;
+
+	rcu_read_lock();
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		goto out;
+
+	nest_helper = nla_nest_start(skb, CTA_HELP);
+	if (!nest_helper)
+		goto nla_put_failure;
+	if (nla_put_string(skb, CTA_HELP_NAME, helper->name))
+		goto nla_put_failure;
+
+	if (helper->to_nlattr)
+		helper->to_nlattr(skb, ct);
+
+	nla_nest_end(skb, nest_helper);
+out:
+	rcu_read_unlock();
+	return 0;
+
+nla_put_failure:
+	rcu_read_unlock();
+	return -1;
+}
+
+static int
+dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct,
+	      enum ip_conntrack_dir dir, int type)
+{
+	enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	struct nf_conn_counter *counter = acct->counter;
+	struct nlattr *nest_count;
+	u64 pkts, bytes;
+
+	if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
+		pkts = atomic64_xchg(&counter[dir].packets, 0);
+		bytes = atomic64_xchg(&counter[dir].bytes, 0);
+	} else {
+		pkts = atomic64_read(&counter[dir].packets);
+		bytes = atomic64_read(&counter[dir].bytes);
+	}
+
+	nest_count = nla_nest_start(skb, attr);
+	if (!nest_count)
+		goto nla_put_failure;
+
+	if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts),
+			 CTA_COUNTERS_PAD) ||
+	    nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes),
+			 CTA_COUNTERS_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_count);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type)
+{
+	struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+
+	if (!acct)
+		return 0;
+
+	if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0)
+		return -1;
+	if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_count;
+	const struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (!tstamp)
+		return 0;
+
+	nest_count = nla_nest_start(skb, CTA_TIMESTAMP);
+	if (!nest_count)
+		goto nla_put_failure;
+
+	if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start),
+			 CTA_TIMESTAMP_PAD) ||
+	    (tstamp->stop != 0 && nla_put_be64(skb, CTA_TIMESTAMP_STOP,
+					       cpu_to_be64(tstamp->stop),
+					       CTA_TIMESTAMP_PAD)))
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_count);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct,
+			       bool dump)
+{
+	u32 mark = READ_ONCE(ct->mark);
+
+	if (!mark && !dump)
+		return 0;
+
+	if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b, c) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_secctx;
+	int len, ret;
+	char *secctx;
+
+	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+	if (ret)
+		return 0;
+
+	ret = -1;
+	nest_secctx = nla_nest_start(skb, CTA_SECCTX);
+	if (!nest_secctx)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, CTA_SECCTX_NAME, secctx))
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_secctx);
+
+	ret = 0;
+nla_put_failure:
+	security_release_secctx(secctx, len);
+	return ret;
+}
+#else
+#define ctnetlink_dump_secctx(a, b) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+static inline int ctnetlink_label_size(const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+	if (!labels)
+		return 0;
+	return nla_total_size(sizeof(labels->bits));
+}
+
+static int
+ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+	unsigned int i;
+
+	if (!labels)
+		return 0;
+
+	i = 0;
+	do {
+		if (labels->bits[i] != 0)
+			return nla_put(skb, CTA_LABELS, sizeof(labels->bits),
+				       labels->bits);
+		i++;
+	} while (i < ARRAY_SIZE(labels->bits));
+
+	return 0;
+}
+#else
+#define ctnetlink_dump_labels(a, b) (0)
+#define ctnetlink_label_size(a)	(0)
+#endif
+
+#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+
+static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_parms;
+
+	if (!(ct->status & IPS_EXPECTED))
+		return 0;
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, type);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
+			 htonl(seq->correction_pos)) ||
+	    nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
+			 htonl(seq->offset_before)) ||
+	    nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
+			 htonl(seq->offset_after)))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *seq;
+
+	if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
+		return 0;
+
+	spin_lock_bh(&ct->lock);
+	seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
+	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
+		goto err;
+
+	seq = &seqadj->seq[IP_CT_DIR_REPLY];
+	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
+		goto err;
+
+	spin_unlock_bh(&ct->lock);
+	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return -1;
+}
+
+static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct)
+{
+	struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+	struct nlattr *nest_parms;
+
+	if (!synproxy)
+		return 0;
+
+	nest_parms = nla_nest_start(skb, CTA_SYNPROXY);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, CTA_SYNPROXY_ISN, htonl(synproxy->isn)) ||
+	    nla_put_be32(skb, CTA_SYNPROXY_ITS, htonl(synproxy->its)) ||
+	    nla_put_be32(skb, CTA_SYNPROXY_TSOFF, htonl(synproxy->tsoff)))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	__be32 id = (__force __be32)nf_ct_get_id(ct);
+
+	if (nla_put_be32(skb, CTA_ID, id))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use))))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+/* all these functions access ct->ext. Caller must either hold a reference
+ * on ct or prevent its deletion by holding either the bucket spinlock or
+ * pcpu dying list lock.
+ */
+static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+				  struct nf_conn *ct, u32 type)
+{
+	if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
+	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_labels(skb, ct) < 0 ||
+	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+	    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+{
+	if (ctnetlink_dump_status(skb, ct) < 0 ||
+	    ctnetlink_dump_mark(skb, ct, true) < 0 ||
+	    ctnetlink_dump_secctx(skb, ct) < 0 ||
+	    ctnetlink_dump_id(skb, ct) < 0 ||
+	    ctnetlink_dump_use(skb, ct) < 0 ||
+	    ctnetlink_dump_master(skb, ct) < 0)
+		return -1;
+
+	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+	    (ctnetlink_dump_timeout(skb, ct, false) < 0 ||
+	     ctnetlink_dump_protoinfo(skb, ct, false) < 0))
+		return -1;
+
+	return 0;
+}
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+		    struct nf_conn *ct, bool extinfo, unsigned int flags)
+{
+	const struct nf_conntrack_zone *zone;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest_parms;
+	unsigned int event;
+
+	if (portid)
+		flags |= NLM_F_MULTI;
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct),
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	zone = nf_ct_zone(ct);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_ORIG) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_REPL) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+				   NF_CT_DEFAULT_ZONE_DIR) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_info(skb, ct) < 0)
+		goto nla_put_failure;
+	if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
+	[CTA_IP_V4_SRC]	= { .type = NLA_U32 },
+	[CTA_IP_V4_DST]	= { .type = NLA_U32 },
+	[CTA_IP_V6_SRC]	= { .len = sizeof(__be32) * 4 },
+	[CTA_IP_V6_DST]	= { .len = sizeof(__be32) * 4 },
+};
+
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
+static size_t ctnetlink_proto_size(const struct nf_conn *ct)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	size_t len, len4 = 0;
+
+	len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
+	len *= 3u; /* ORIG, REPLY, MASTER */
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+	len += l4proto->nlattr_size;
+	if (l4proto->nlattr_tuple_size) {
+		len4 = l4proto->nlattr_tuple_size();
+		len4 *= 3u; /* ORIG, REPLY, MASTER */
+	}
+
+	return len + len4;
+}
+#endif
+
+static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
+{
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
+		return 0;
+	return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+	       + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+	       + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
+	       ;
+}
+
+static inline int ctnetlink_secctx_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	int len, ret;
+
+	ret = security_secid_to_secctx(ct->secmark, NULL, &len);
+	if (ret)
+		return 0;
+
+	return nla_total_size(0) /* CTA_SECCTX */
+	       + nla_total_size(sizeof(char) * len); /* CTA_SECCTX_NAME */
+#else
+	return 0;
+#endif
+}
+
+static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+		return 0;
+	return nla_total_size(0) + 2 * nla_total_size_64bit(sizeof(uint64_t));
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
+{
+	return NLMSG_ALIGN(sizeof(struct nfgenmsg))
+	       + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+	       + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+	       + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+	       + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
+	       + ctnetlink_acct_size(ct)
+	       + ctnetlink_timestamp_size(ct)
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+	       + nla_total_size(0) /* CTA_PROTOINFO */
+	       + nla_total_size(0) /* CTA_HELP */
+	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+	       + ctnetlink_secctx_size(ct)
+#if IS_ENABLED(CONFIG_NF_NAT)
+	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+	       + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	       + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
+#endif
+	       + ctnetlink_proto_size(ct)
+	       + ctnetlink_label_size(ct)
+	       ;
+}
+
+static int
+ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
+{
+	const struct nf_conntrack_zone *zone;
+	struct net *net;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest_parms;
+	struct nf_conn *ct = item->ct;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned int flags = 0, group;
+	int err;
+
+	if (events & (1 << IPCT_DESTROY)) {
+		type = IPCTNL_MSG_CT_DELETE;
+		group = NFNLGRP_CONNTRACK_DESTROY;
+	} else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
+		type = IPCTNL_MSG_CT_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+		group = NFNLGRP_CONNTRACK_NEW;
+	} else if (events) {
+		type = IPCTNL_MSG_CT_NEW;
+		group = NFNLGRP_CONNTRACK_UPDATE;
+	} else
+		return 0;
+
+	net = nf_ct_net(ct);
+	if (!item->report && !nfnetlink_has_listeners(net, group))
+		return 0;
+
+	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
+	nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct),
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	zone = nf_ct_zone(ct);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_ORIG) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_REPL) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+				   NF_CT_DEFAULT_ZONE_DIR) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_id(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_status(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (events & (1 << IPCT_DESTROY)) {
+		if (ctnetlink_dump_timeout(skb, ct, true) < 0)
+			goto nla_put_failure;
+
+		if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+		    ctnetlink_dump_timestamp(skb, ct) < 0 ||
+		    ctnetlink_dump_protoinfo(skb, ct, true) < 0)
+			goto nla_put_failure;
+	} else {
+		if (ctnetlink_dump_timeout(skb, ct, false) < 0)
+			goto nla_put_failure;
+
+		if (events & (1 << IPCT_PROTOINFO) &&
+		    ctnetlink_dump_protoinfo(skb, ct, false) < 0)
+			goto nla_put_failure;
+
+		if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
+		    && ctnetlink_dump_helpinfo(skb, ct) < 0)
+			goto nla_put_failure;
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+		if ((events & (1 << IPCT_SECMARK) || ct->secmark)
+		    && ctnetlink_dump_secctx(skb, ct) < 0)
+			goto nla_put_failure;
+#endif
+		if (events & (1 << IPCT_LABEL) &&
+		     ctnetlink_dump_labels(skb, ct) < 0)
+			goto nla_put_failure;
+
+		if (events & (1 << IPCT_RELATED) &&
+		    ctnetlink_dump_master(skb, ct) < 0)
+			goto nla_put_failure;
+
+		if (events & (1 << IPCT_SEQADJ) &&
+		    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
+			goto nla_put_failure;
+
+		if (events & (1 << IPCT_SYNPROXY) &&
+		    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+			goto nla_put_failure;
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
+		goto nla_put_failure;
+#endif
+	nlmsg_end(skb, nlh);
+	err = nfnetlink_send(skb, net, item->portid, group, item->report,
+			     GFP_ATOMIC);
+	if (err == -ENOBUFS || err == -EAGAIN)
+		return -ENOBUFS;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+nlmsg_failure:
+	kfree_skb(skb);
+errout:
+	if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+		return -ENOBUFS;
+
+	return 0;
+}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+	if (cb->args[1])
+		nf_ct_put((struct nf_conn *)cb->args[1]);
+	kfree(cb->data);
+	return 0;
+}
+
+struct ctnetlink_filter_u32 {
+	u32 val;
+	u32 mask;
+};
+
+struct ctnetlink_filter {
+	u8 family;
+
+	u_int32_t orig_flags;
+	u_int32_t reply_flags;
+
+	struct nf_conntrack_tuple orig;
+	struct nf_conntrack_tuple reply;
+	struct nf_conntrack_zone zone;
+
+	struct ctnetlink_filter_u32 mark;
+	struct ctnetlink_filter_u32 status;
+};
+
+static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
+	[CTA_FILTER_ORIG_FLAGS]		= { .type = NLA_U32 },
+	[CTA_FILTER_REPLY_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_parse_filter(const struct nlattr *attr,
+				  struct ctnetlink_filter *filter)
+{
+	struct nlattr *tb[CTA_FILTER_MAX + 1];
+	int ret = 0;
+
+	ret = nla_parse_nested(tb, CTA_FILTER_MAX, attr, cta_filter_nla_policy,
+			       NULL);
+	if (ret)
+		return ret;
+
+	if (tb[CTA_FILTER_ORIG_FLAGS]) {
+		filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
+		if (filter->orig_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	if (tb[CTA_FILTER_REPLY_FLAGS]) {
+		filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
+		if (filter->reply_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int ctnetlink_parse_zone(const struct nlattr *attr,
+				struct nf_conntrack_zone *zone);
+static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+					 struct nf_conntrack_tuple *tuple,
+					 u32 type, u_int8_t l3num,
+					 struct nf_conntrack_zone *zone,
+					 u_int32_t flags);
+
+static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark,
+				       const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (cda[CTA_MARK]) {
+		mark->val = ntohl(nla_get_be32(cda[CTA_MARK]));
+
+		if (cda[CTA_MARK_MASK])
+			mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+		else
+			mark->mask = 0xffffffff;
+	} else if (cda[CTA_MARK_MASK]) {
+		return -EINVAL;
+	}
+#endif
+	return 0;
+}
+
+static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status,
+					 const struct nlattr * const cda[])
+{
+	if (cda[CTA_STATUS]) {
+		status->val = ntohl(nla_get_be32(cda[CTA_STATUS]));
+		if (cda[CTA_STATUS_MASK])
+			status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK]));
+		else
+			status->mask = status->val;
+
+		/* status->val == 0? always true, else always false. */
+		if (status->mask == 0)
+			return -EINVAL;
+	} else if (cda[CTA_STATUS_MASK]) {
+		return -EINVAL;
+	}
+
+	/* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */
+	BUILD_BUG_ON(__IPS_MAX_BIT >= 32);
+	return 0;
+}
+
+static struct ctnetlink_filter *
+ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
+{
+	struct ctnetlink_filter *filter;
+	int err;
+
+#ifndef CONFIG_NF_CONNTRACK_MARK
+	if (cda[CTA_MARK] || cda[CTA_MARK_MASK])
+		return ERR_PTR(-EOPNOTSUPP);
+#endif
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (filter == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	filter->family = family;
+
+	err = ctnetlink_filter_parse_mark(&filter->mark, cda);
+	if (err)
+		goto err_filter;
+
+	err = ctnetlink_filter_parse_status(&filter->status, cda);
+	if (err)
+		goto err_filter;
+
+	if (!cda[CTA_FILTER])
+		return filter;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+	if (err < 0)
+		goto err_filter;
+
+	err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
+	if (err < 0)
+		goto err_filter;
+
+	if (filter->orig_flags) {
+		if (!cda[CTA_TUPLE_ORIG]) {
+			err = -EINVAL;
+			goto err_filter;
+		}
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->orig,
+						   CTA_TUPLE_ORIG,
+						   filter->family,
+						   &filter->zone,
+						   filter->orig_flags);
+		if (err < 0)
+			goto err_filter;
+	}
+
+	if (filter->reply_flags) {
+		if (!cda[CTA_TUPLE_REPLY]) {
+			err = -EINVAL;
+			goto err_filter;
+		}
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->reply,
+						   CTA_TUPLE_REPLY,
+						   filter->family,
+						   &filter->zone,
+						   filter->reply_flags);
+		if (err < 0)
+			goto err_filter;
+	}
+
+	return filter;
+
+err_filter:
+	kfree(filter);
+
+	return ERR_PTR(err);
+}
+
+static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
+{
+	return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
+}
+
+static int ctnetlink_start(struct netlink_callback *cb)
+{
+	const struct nlattr * const *cda = cb->data;
+	struct ctnetlink_filter *filter = NULL;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	u8 family = nfmsg->nfgen_family;
+
+	if (ctnetlink_needs_filter(family, cda)) {
+		filter = ctnetlink_alloc_filter(cda, family);
+		if (IS_ERR(filter))
+			return PTR_ERR(filter);
+	}
+
+	cb->data = filter;
+	return 0;
+}
+
+static int ctnetlink_filter_match_tuple(struct nf_conntrack_tuple *filter_tuple,
+					struct nf_conntrack_tuple *ct_tuple,
+					u_int32_t flags, int family)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    filter_tuple->src.u3.ip != ct_tuple->src.u3.ip)
+			return  0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    filter_tuple->dst.u3.ip != ct_tuple->dst.u3.ip)
+			return  0;
+		break;
+	case NFPROTO_IPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    !ipv6_addr_cmp(&filter_tuple->src.u3.in6,
+				   &ct_tuple->src.u3.in6))
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    !ipv6_addr_cmp(&filter_tuple->dst.u3.in6,
+				   &ct_tuple->dst.u3.in6))
+			return 0;
+		break;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) &&
+	    filter_tuple->dst.protonum != ct_tuple->dst.protonum)
+		return 0;
+
+	switch (ct_tuple->dst.protonum) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) &&
+		    filter_tuple->src.u.tcp.port != ct_tuple->src.u.tcp.port)
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) &&
+		    filter_tuple->dst.u.tcp.port != ct_tuple->dst.u.tcp.port)
+			return 0;
+		break;
+	case IPPROTO_ICMP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	case IPPROTO_ICMPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
+static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
+{
+	struct ctnetlink_filter *filter = data;
+	struct nf_conntrack_tuple *tuple;
+	u32 status;
+
+	if (filter == NULL)
+		goto out;
+
+	/* Match entries of a given L3 protocol number.
+	 * If it is not specified, ie. l3proto == 0,
+	 * then match everything.
+	 */
+	if (filter->family && nf_ct_l3num(ct) != filter->family)
+		goto ignore_entry;
+
+	if (filter->orig_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
+		if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
+						  filter->orig_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
+	if (filter->reply_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_REPLY);
+		if (!ctnetlink_filter_match_tuple(&filter->reply, tuple,
+						  filter->reply_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val)
+		goto ignore_entry;
+#endif
+	status = (u32)READ_ONCE(ct->status);
+	if ((status & filter->status.mask) != filter->status.val)
+		goto ignore_entry;
+
+out:
+	return 1;
+
+ignore_entry:
+	return 0;
+}
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0;
+	struct net *net = sock_net(skb->sk);
+	struct nf_conn *ct, *last;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+	struct nf_conn *nf_ct_evict[8];
+	int res, i;
+	spinlock_t *lockp;
+
+	last = (struct nf_conn *)cb->args[1];
+	i = 0;
+
+	local_bh_disable();
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+restart:
+		while (i) {
+			i--;
+			if (nf_ct_should_gc(nf_ct_evict[i]))
+				nf_ct_kill(nf_ct_evict[i]);
+			nf_ct_put(nf_ct_evict[i]);
+		}
+
+		lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
+		nf_conntrack_lock(lockp);
+		if (cb->args[0] >= nf_conntrack_htable_size) {
+			spin_unlock(lockp);
+			goto out;
+		}
+		hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
+					   hnnode) {
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (nf_ct_is_expired(ct)) {
+				/* need to defer nf_ct_kill() until lock is released */
+				if (i < ARRAY_SIZE(nf_ct_evict) &&
+				    refcount_inc_not_zero(&ct->ct_general.use))
+					nf_ct_evict[i++] = ct;
+				continue;
+			}
+
+			if (!net_eq(net, nf_ct_net(ct)))
+				continue;
+
+			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+
+			if (cb->args[1]) {
+				if (ct != last)
+					continue;
+				cb->args[1] = 0;
+			}
+			if (!ctnetlink_filter_match(ct, cb->data))
+				continue;
+
+			res =
+			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+					    ct, true, flags);
+			if (res < 0) {
+				nf_conntrack_get(&ct->ct_general);
+				cb->args[1] = (unsigned long)ct;
+				spin_unlock(lockp);
+				goto out;
+			}
+		}
+		spin_unlock(lockp);
+		if (cb->args[1]) {
+			cb->args[1] = 0;
+			goto restart;
+		}
+	}
+out:
+	local_bh_enable();
+	if (last) {
+		/* nf ct hash resize happened, now clear the leftover. */
+		if ((struct nf_conn *)cb->args[1] == last)
+			cb->args[1] = 0;
+
+		nf_ct_put(last);
+	}
+
+	while (i) {
+		i--;
+		if (nf_ct_should_gc(nf_ct_evict[i]))
+			nf_ct_kill(nf_ct_evict[i]);
+		nf_ct_put(nf_ct_evict[i]);
+	}
+
+	return skb->len;
+}
+
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
+{
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V4_SRC])
+			return -EINVAL;
+
+		t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V4_DST])
+			return -EINVAL;
+
+		t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+	}
+
+	return 0;
+}
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
+{
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V6_SRC])
+			return -EINVAL;
+
+		t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V6_DST])
+			return -EINVAL;
+
+		t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+	}
+
+	return 0;
+}
+
+static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
+				    struct nf_conntrack_tuple *tuple,
+				    u_int32_t flags)
+{
+	struct nlattr *tb[CTA_IP_MAX+1];
+	int ret = 0;
+
+	ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
+	if (ret < 0)
+		return ret;
+
+	ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
+					     cta_ip_nla_policy, NULL);
+	if (ret)
+		return ret;
+
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_nlattr_to_tuple(tb, tuple, flags);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_NUM]	= { .type = NLA_U8 },
+};
+
+static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
+				       struct nf_conntrack_tuple *tuple,
+				       u_int32_t flags)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nlattr *tb[CTA_PROTO_MAX+1];
+	int ret = 0;
+
+	ret = nla_parse_nested_deprecated(tb, CTA_PROTO_MAX, attr,
+					  proto_nla_policy, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!(flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)))
+		return 0;
+
+	if (!tb[CTA_PROTO_NUM])
+		return -EINVAL;
+
+	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
+
+	rcu_read_lock();
+	l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
+
+	if (likely(l4proto->nlattr_to_tuple)) {
+		ret = nla_validate_nested_deprecated(attr, CTA_PROTO_MAX,
+						     l4proto->nla_policy,
+						     NULL);
+		if (ret == 0)
+			ret = l4proto->nlattr_to_tuple(tb, tuple, flags);
+	}
+
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int
+ctnetlink_parse_zone(const struct nlattr *attr,
+		     struct nf_conntrack_zone *zone)
+{
+	nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID,
+			NF_CT_DEFAULT_ZONE_DIR, 0);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (attr)
+		zone->id = ntohs(nla_get_be16(attr));
+#else
+	if (attr)
+		return -EOPNOTSUPP;
+#endif
+	return 0;
+}
+
+static int
+ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type,
+			   struct nf_conntrack_zone *zone)
+{
+	int ret;
+
+	if (zone->id != NF_CT_DEFAULT_ZONE_ID)
+		return -EINVAL;
+
+	ret = ctnetlink_parse_zone(attr, zone);
+	if (ret < 0)
+		return ret;
+
+	if (type == CTA_TUPLE_REPLY)
+		zone->dir = NF_CT_ZONE_DIR_REPL;
+	else
+		zone->dir = NF_CT_ZONE_DIR_ORIG;
+
+	return 0;
+}
+
+static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
+	[CTA_TUPLE_IP]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_PROTO]	= { .type = NLA_NESTED },
+	[CTA_TUPLE_ZONE]	= { .type = NLA_U16 },
+};
+
+#define CTA_FILTER_F_ALL_CTA_PROTO \
+  (CTA_FILTER_F_CTA_PROTO_SRC_PORT | \
+   CTA_FILTER_F_CTA_PROTO_DST_PORT | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_ID | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_ID)
+
+static int
+ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+			      struct nf_conntrack_tuple *tuple, u32 type,
+			      u_int8_t l3num, struct nf_conntrack_zone *zone,
+			      u_int32_t flags)
+{
+	struct nlattr *tb[CTA_TUPLE_MAX+1];
+	int err;
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	err = nla_parse_nested_deprecated(tb, CTA_TUPLE_MAX, cda[type],
+					  tuple_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6)
+		return -EOPNOTSUPP;
+	tuple->src.l3num = l3num;
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST) ||
+	    flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_TUPLE_IP])
+			return -EINVAL;
+
+		err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple, flags);
+		if (err < 0)
+			return err;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) {
+		if (!tb[CTA_TUPLE_PROTO])
+			return -EINVAL;
+
+		err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple, flags);
+		if (err < 0)
+			return err;
+	} else if (flags & CTA_FILTER_FLAG(ALL_CTA_PROTO)) {
+		/* Can't manage proto flags without a protonum  */
+		return -EINVAL;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_TUPLE_ZONE)) && tb[CTA_TUPLE_ZONE]) {
+		if (!zone)
+			return -EINVAL;
+
+		err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE],
+						 type, zone);
+		if (err < 0)
+			return err;
+	}
+
+	/* orig and expect tuples get DIR_ORIGINAL */
+	if (type == CTA_TUPLE_REPLY)
+		tuple->dst.dir = IP_CT_DIR_REPLY;
+	else
+		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	return 0;
+}
+
+static int
+ctnetlink_parse_tuple(const struct nlattr * const cda[],
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
+{
+	return ctnetlink_parse_tuple_filter(cda, tuple, type, l3num, zone,
+					    CTA_FILTER_FLAG(ALL));
+}
+
+static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
+	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING,
+				    .len = NF_CT_HELPER_NAME_LEN - 1 },
+};
+
+static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
+				struct nlattr **helpinfo)
+{
+	int err;
+	struct nlattr *tb[CTA_HELP_MAX+1];
+
+	err = nla_parse_nested_deprecated(tb, CTA_HELP_MAX, attr,
+					  help_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_HELP_NAME])
+		return -EINVAL;
+
+	*helper_name = nla_data(tb[CTA_HELP_NAME]);
+
+	if (tb[CTA_HELP_INFO])
+		*helpinfo = tb[CTA_HELP_INFO];
+
+	return 0;
+}
+
+static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
+	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
+	[CTA_STATUS] 		= { .type = NLA_U32 },
+	[CTA_PROTOINFO]		= { .type = NLA_NESTED },
+	[CTA_HELP]		= { .type = NLA_NESTED },
+	[CTA_NAT_SRC]		= { .type = NLA_NESTED },
+	[CTA_TIMEOUT] 		= { .type = NLA_U32 },
+	[CTA_MARK]		= { .type = NLA_U32 },
+	[CTA_ID]		= { .type = NLA_U32 },
+	[CTA_NAT_DST]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
+	[CTA_NAT_SEQ_ADJ_ORIG]  = { .type = NLA_NESTED },
+	[CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
+	[CTA_ZONE]		= { .type = NLA_U16 },
+	[CTA_MARK_MASK]		= { .type = NLA_U32 },
+	[CTA_LABELS]		= { .type = NLA_BINARY,
+				    .len = NF_CT_LABELS_MAX_SIZE },
+	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
+				    .len = NF_CT_LABELS_MAX_SIZE },
+	[CTA_FILTER]		= { .type = NLA_NESTED },
+	[CTA_STATUS_MASK]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+{
+	return ctnetlink_filter_match(ct, data);
+}
+
+static int ctnetlink_flush_conntrack(struct net *net,
+				     const struct nlattr * const cda[],
+				     u32 portid, int report, u8 family)
+{
+	struct ctnetlink_filter *filter = NULL;
+	struct nf_ct_iter_data iter = {
+		.net		= net,
+		.portid		= portid,
+		.report		= report,
+	};
+
+	if (ctnetlink_needs_filter(family, cda)) {
+		if (cda[CTA_FILTER])
+			return -EOPNOTSUPP;
+
+		filter = ctnetlink_alloc_filter(cda, family);
+		if (IS_ERR(filter))
+			return PTR_ERR(filter);
+
+		iter.data = filter;
+	}
+
+	nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter);
+	kfree(filter);
+
+	return 0;
+}
+
+static int ctnetlink_del_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
+{
+	u8 family = info->nfmsg->nfgen_family;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_TUPLE_ORIG])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+					    family, &zone);
+	else if (cda[CTA_TUPLE_REPLY])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+					    family, &zone);
+	else {
+		u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
+
+		return ctnetlink_flush_conntrack(info->net, cda,
+						 NETLINK_CB(skb).portid,
+						 nlmsg_report(info->nlh), u3);
+	}
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(info->net, &zone, &tuple);
+	if (!h)
+		return -ENOENT;
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (cda[CTA_ID]) {
+		__be32 id = nla_get_be32(cda[CTA_ID]);
+
+		if (id != (__force __be32)nf_ct_get_id(ct)) {
+			nf_ct_put(ct);
+			return -ENOENT;
+		}
+	}
+
+	nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh));
+	nf_ct_put(ct);
+
+	return 0;
+}
+
+static int ctnetlink_get_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
+{
+	u_int8_t u3 = info->nfmsg->nfgen_family;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	struct sk_buff *skb2;
+	struct nf_conn *ct;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = ctnetlink_start,
+			.dump = ctnetlink_dump_table,
+			.done = ctnetlink_done,
+			.data = (void *)cda,
+		};
+
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_TUPLE_ORIG])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+					    u3, &zone);
+	else if (cda[CTA_TUPLE_REPLY])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+					    u3, &zone);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(info->net, &zone, &tuple);
+	if (!h)
+		return -ENOENT;
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2) {
+		nf_ct_put(ct);
+		return -ENOMEM;
+	}
+
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid,
+				  info->nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct,
+				  true, 0);
+	nf_ct_put(ct);
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+}
+
+static int ctnetlink_done_list(struct netlink_callback *cb)
+{
+	struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+
+	if (ctx->last)
+		nf_ct_put(ctx->last);
+
+	return 0;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+				    struct netlink_callback *cb,
+				    struct nf_conn *ct,
+				    bool dying)
+{
+	struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	u8 l3proto = nfmsg->nfgen_family;
+	int res;
+
+	if (l3proto && nf_ct_l3num(ct) != l3proto)
+		return 0;
+
+	if (ctx->last) {
+		if (ct != ctx->last)
+			return 0;
+
+		ctx->last = NULL;
+	}
+
+	/* We can't dump extension info for the unconfirmed
+	 * list because unconfirmed conntracks can have
+	 * ct->ext reallocated (and thus freed).
+	 *
+	 * In the dying list case ct->ext can't be free'd
+	 * until after we drop pcpu->lock.
+	 */
+	res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+				  ct, dying, 0);
+	if (res < 0) {
+		if (!refcount_inc_not_zero(&ct->ct_general.use))
+			return 0;
+
+		ctx->last = ct;
+	}
+
+	return res;
+}
+#endif
+
+static int
+ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int
+ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+	struct nf_conn *last = ctx->last;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	const struct net *net = sock_net(skb->sk);
+	struct nf_conntrack_net_ecache *ecache_net;
+	struct nf_conntrack_tuple_hash *h;
+	struct hlist_nulls_node *n;
+#endif
+
+	if (ctx->done)
+		return 0;
+
+	ctx->last = NULL;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	ecache_net = nf_conn_pernet_ecache(net);
+	spin_lock_bh(&ecache_net->dying_lock);
+
+	hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) {
+		struct nf_conn *ct;
+		int res;
+
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (last && last != ct)
+			continue;
+
+		res = ctnetlink_dump_one_entry(skb, cb, ct, true);
+		if (res < 0) {
+			spin_unlock_bh(&ecache_net->dying_lock);
+			nf_ct_put(last);
+			return skb->len;
+		}
+
+		nf_ct_put(last);
+		last = NULL;
+	}
+
+	spin_unlock_bh(&ecache_net->dying_lock);
+#endif
+	ctx->done = true;
+	nf_ct_put(last);
+
+	return skb->len;
+}
+
+static int ctnetlink_get_ct_dying(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const cda[])
+{
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_dump_dying,
+			.done = ctnetlink_done_list,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
+					const struct nfnl_info *info,
+					const struct nlattr * const cda[])
+{
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_dump_unconfirmed,
+			.done = ctnetlink_done_list,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  const struct nlattr *attr)
+	__must_hold(RCU)
+{
+	const struct nf_nat_hook *nat_hook;
+	int err;
+
+	nat_hook = rcu_dereference(nf_nat_hook);
+	if (!nat_hook) {
+#ifdef CONFIG_MODULES
+		rcu_read_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+		if (request_module("nf-nat") < 0) {
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+			rcu_read_lock();
+			return -EOPNOTSUPP;
+		}
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+		rcu_read_lock();
+		nat_hook = rcu_dereference(nf_nat_hook);
+		if (nat_hook)
+			return -EAGAIN;
+#endif
+		return -EOPNOTSUPP;
+	}
+
+	err = nat_hook->parse_nat_setup(ct, manip, attr);
+	if (err == -EAGAIN) {
+#ifdef CONFIG_MODULES
+		rcu_read_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+		if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+			rcu_read_lock();
+			return -EOPNOTSUPP;
+		}
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+		rcu_read_lock();
+#else
+		err = -EOPNOTSUPP;
+#endif
+	}
+	return err;
+}
+#endif
+
+static int
+ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+	return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS])));
+}
+
+static int
+ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+#if IS_ENABLED(CONFIG_NF_NAT)
+	int ret;
+
+	if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+		return 0;
+
+	ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST,
+					cda[CTA_NAT_DST]);
+	if (ret < 0)
+		return ret;
+
+	return ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
+					 cda[CTA_NAT_SRC]);
+#else
+	if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+		return 0;
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int ctnetlink_change_helper(struct nf_conn *ct,
+				   const struct nlattr * const cda[])
+{
+	struct nf_conntrack_helper *helper;
+	struct nf_conn_help *help = nfct_help(ct);
+	char *helpname = NULL;
+	struct nlattr *helpinfo = NULL;
+	int err;
+
+	err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo);
+	if (err < 0)
+		return err;
+
+	/* don't change helper of sibling connections */
+	if (ct->master) {
+		/* If we try to change the helper to the same thing twice,
+		 * treat the second attempt as a no-op instead of returning
+		 * an error.
+		 */
+		err = -EBUSY;
+		if (help) {
+			rcu_read_lock();
+			helper = rcu_dereference(help->helper);
+			if (helper && !strcmp(helper->name, helpname))
+				err = 0;
+			rcu_read_unlock();
+		}
+
+		return err;
+	}
+
+	if (!strcmp(helpname, "")) {
+		if (help && help->helper) {
+			/* we had a helper before ... */
+			nf_ct_remove_expectations(ct);
+			RCU_INIT_POINTER(help->helper, NULL);
+		}
+
+		return 0;
+	}
+
+	rcu_read_lock();
+	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+					    nf_ct_protonum(ct));
+	if (helper == NULL) {
+		rcu_read_unlock();
+		return -EOPNOTSUPP;
+	}
+
+	if (help) {
+		if (rcu_access_pointer(help->helper) == helper) {
+			/* update private helper data if allowed. */
+			if (helper->from_nlattr)
+				helper->from_nlattr(helpinfo, ct);
+			err = 0;
+		} else
+			err = -EBUSY;
+	} else {
+		/* we cannot set a helper for an existing conntrack */
+		err = -EOPNOTSUPP;
+	}
+
+	rcu_read_unlock();
+	return err;
+}
+
+static int ctnetlink_change_timeout(struct nf_conn *ct,
+				    const struct nlattr * const cda[])
+{
+	return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ);
+}
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+static void ctnetlink_change_mark(struct nf_conn *ct,
+				    const struct nlattr * const cda[])
+{
+	u32 mark, newmark, mask = 0;
+
+	if (cda[CTA_MARK_MASK])
+		mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+	mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+	newmark = (READ_ONCE(ct->mark) & mask) ^ mark;
+	if (newmark != READ_ONCE(ct->mark))
+		WRITE_ONCE(ct->mark, newmark);
+}
+#endif
+
+static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
+	[CTA_PROTOINFO_TCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_DCCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_SCTP]	= { .type = NLA_NESTED },
+};
+
+static int ctnetlink_change_protoinfo(struct nf_conn *ct,
+				      const struct nlattr * const cda[])
+{
+	const struct nlattr *attr = cda[CTA_PROTOINFO];
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
+	int err = 0;
+
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_MAX, attr,
+					  protoinfo_policy, NULL);
+	if (err < 0)
+		return err;
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+	if (l4proto->from_nlattr)
+		err = l4proto->from_nlattr(tb, ct);
+
+	return err;
+}
+
+static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
+	[CTA_SEQADJ_CORRECTION_POS]	= { .type = NLA_U32 },
+	[CTA_SEQADJ_OFFSET_BEFORE]	= { .type = NLA_U32 },
+	[CTA_SEQADJ_OFFSET_AFTER]	= { .type = NLA_U32 },
+};
+
+static int change_seq_adj(struct nf_ct_seqadj *seq,
+			  const struct nlattr * const attr)
+{
+	int err;
+	struct nlattr *cda[CTA_SEQADJ_MAX+1];
+
+	err = nla_parse_nested_deprecated(cda, CTA_SEQADJ_MAX, attr,
+					  seqadj_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!cda[CTA_SEQADJ_CORRECTION_POS])
+		return -EINVAL;
+
+	seq->correction_pos =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
+
+	if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
+		return -EINVAL;
+
+	seq->offset_before =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
+
+	if (!cda[CTA_SEQADJ_OFFSET_AFTER])
+		return -EINVAL;
+
+	seq->offset_after =
+		ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
+
+	return 0;
+}
+
+static int
+ctnetlink_change_seq_adj(struct nf_conn *ct,
+			 const struct nlattr * const cda[])
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	int ret = 0;
+
+	if (!seqadj)
+		return 0;
+
+	spin_lock_bh(&ct->lock);
+	if (cda[CTA_SEQ_ADJ_ORIG]) {
+		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
+				     cda[CTA_SEQ_ADJ_ORIG]);
+		if (ret < 0)
+			goto err;
+
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+	}
+
+	if (cda[CTA_SEQ_ADJ_REPLY]) {
+		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
+				     cda[CTA_SEQ_ADJ_REPLY]);
+		if (ret < 0)
+			goto err;
+
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+	}
+
+	spin_unlock_bh(&ct->lock);
+	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return ret;
+}
+
+static const struct nla_policy synproxy_policy[CTA_SYNPROXY_MAX + 1] = {
+	[CTA_SYNPROXY_ISN]	= { .type = NLA_U32 },
+	[CTA_SYNPROXY_ITS]	= { .type = NLA_U32 },
+	[CTA_SYNPROXY_TSOFF]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_change_synproxy(struct nf_conn *ct,
+				     const struct nlattr * const cda[])
+{
+	struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+	struct nlattr *tb[CTA_SYNPROXY_MAX + 1];
+	int err;
+
+	if (!synproxy)
+		return 0;
+
+	err = nla_parse_nested_deprecated(tb, CTA_SYNPROXY_MAX,
+					  cda[CTA_SYNPROXY], synproxy_policy,
+					  NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_SYNPROXY_ISN] ||
+	    !tb[CTA_SYNPROXY_ITS] ||
+	    !tb[CTA_SYNPROXY_TSOFF])
+		return -EINVAL;
+
+	synproxy->isn = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ISN]));
+	synproxy->its = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ITS]));
+	synproxy->tsoff = ntohl(nla_get_be32(tb[CTA_SYNPROXY_TSOFF]));
+
+	return 0;
+}
+
+static int
+ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	size_t len = nla_len(cda[CTA_LABELS]);
+	const void *mask = cda[CTA_LABELS_MASK];
+
+	if (len & (sizeof(u32)-1)) /* must be multiple of u32 */
+		return -EINVAL;
+
+	if (mask) {
+		if (nla_len(cda[CTA_LABELS_MASK]) == 0 ||
+		    nla_len(cda[CTA_LABELS_MASK]) != len)
+			return -EINVAL;
+		mask = nla_data(cda[CTA_LABELS_MASK]);
+	}
+
+	len /= sizeof(u32);
+
+	return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len);
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int
+ctnetlink_change_conntrack(struct nf_conn *ct,
+			   const struct nlattr * const cda[])
+{
+	int err;
+
+	/* only allow NAT changes and master assignation for new conntracks */
+	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER])
+		return -EOPNOTSUPP;
+
+	if (cda[CTA_HELP]) {
+		err = ctnetlink_change_helper(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TIMEOUT]) {
+		err = ctnetlink_change_timeout(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_STATUS]) {
+		err = ctnetlink_change_status(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_PROTOINFO]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK])
+		ctnetlink_change_mark(ct, cda);
+#endif
+
+	if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_seq_adj(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_SYNPROXY]) {
+		err = ctnetlink_change_synproxy(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct nf_conn *
+ctnetlink_create_conntrack(struct net *net,
+			   const struct nf_conntrack_zone *zone,
+			   const struct nlattr * const cda[],
+			   struct nf_conntrack_tuple *otuple,
+			   struct nf_conntrack_tuple *rtuple,
+			   u8 u3)
+{
+	struct nf_conn *ct;
+	int err = -EINVAL;
+	struct nf_conntrack_helper *helper;
+	struct nf_conn_tstamp *tstamp;
+	u64 timeout;
+
+	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
+	if (IS_ERR(ct))
+		return ERR_PTR(-ENOMEM);
+
+	if (!cda[CTA_TIMEOUT])
+		goto err1;
+
+	rcu_read_lock();
+ 	if (cda[CTA_HELP]) {
+		char *helpname = NULL;
+		struct nlattr *helpinfo = NULL;
+
+		err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo);
+ 		if (err < 0)
+			goto err2;
+
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
+		if (helper == NULL) {
+			rcu_read_unlock();
+#ifdef CONFIG_MODULES
+			if (request_module("nfct-helper-%s", helpname) < 0) {
+				err = -EOPNOTSUPP;
+				goto err1;
+			}
+
+			rcu_read_lock();
+			helper = __nf_conntrack_helper_find(helpname,
+							    nf_ct_l3num(ct),
+							    nf_ct_protonum(ct));
+			if (helper) {
+				err = -EAGAIN;
+				goto err2;
+			}
+			rcu_read_unlock();
+#endif
+			err = -EOPNOTSUPP;
+			goto err1;
+		} else {
+			struct nf_conn_help *help;
+
+			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+			if (help == NULL) {
+				err = -ENOMEM;
+				goto err2;
+			}
+			/* set private helper data if allowed. */
+			if (helper->from_nlattr)
+				helper->from_nlattr(helpinfo, ct);
+
+			/* disable helper auto-assignment for this entry */
+			ct->status |= IPS_HELPER;
+			RCU_INIT_POINTER(help->helper, helper);
+		}
+	}
+
+	err = ctnetlink_setup_nat(ct, cda);
+	if (err < 0)
+		goto err2;
+
+	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
+
+	/* we must add conntrack extensions before confirmation. */
+	ct->status |= IPS_CONFIRMED;
+
+	timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+	__nf_ct_set_timeout(ct, timeout);
+
+	if (cda[CTA_STATUS]) {
+		err = ctnetlink_change_status(ct, cda);
+		if (err < 0)
+			goto err2;
+	}
+
+	if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_seq_adj(ct, cda);
+		if (err < 0)
+			goto err2;
+	}
+
+	memset(&ct->proto, 0, sizeof(ct->proto));
+	if (cda[CTA_PROTOINFO]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			goto err2;
+	}
+
+	if (cda[CTA_SYNPROXY]) {
+		err = ctnetlink_change_synproxy(ct, cda);
+		if (err < 0)
+			goto err2;
+	}
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK])
+		ctnetlink_change_mark(ct, cda);
+#endif
+
+	/* setup master conntrack: this is a confirmed expectation */
+	if (cda[CTA_TUPLE_MASTER]) {
+		struct nf_conntrack_tuple master;
+		struct nf_conntrack_tuple_hash *master_h;
+		struct nf_conn *master_ct;
+
+		err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER,
+					    u3, NULL);
+		if (err < 0)
+			goto err2;
+
+		master_h = nf_conntrack_find_get(net, zone, &master);
+		if (master_h == NULL) {
+			err = -ENOENT;
+			goto err2;
+		}
+		master_ct = nf_ct_tuplehash_to_ctrack(master_h);
+		__set_bit(IPS_EXPECTED_BIT, &ct->status);
+		ct->master = master_ct;
+	}
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp)
+		tstamp->start = ktime_get_real_ns();
+
+	err = nf_conntrack_hash_check_insert(ct);
+	if (err < 0)
+		goto err3;
+
+	rcu_read_unlock();
+
+	return ct;
+
+err3:
+	if (ct->master)
+		nf_ct_put(ct->master);
+err2:
+	rcu_read_unlock();
+err1:
+	nf_conntrack_free(ct);
+	return ERR_PTR(err);
+}
+
+static int ctnetlink_new_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
+{
+	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
+	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_TUPLE_ORIG]) {
+		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG,
+					    u3, &zone);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TUPLE_REPLY]) {
+		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY,
+					    u3, &zone);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TUPLE_ORIG])
+		h = nf_conntrack_find_get(info->net, &zone, &otuple);
+	else if (cda[CTA_TUPLE_REPLY])
+		h = nf_conntrack_find_get(info->net, &zone, &rtuple);
+
+	if (h == NULL) {
+		err = -ENOENT;
+		if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
+			enum ip_conntrack_events events;
+
+			if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
+				return -EINVAL;
+			if (otuple.dst.protonum != rtuple.dst.protonum)
+				return -EINVAL;
+
+			ct = ctnetlink_create_conntrack(info->net, &zone, cda,
+							&otuple, &rtuple, u3);
+			if (IS_ERR(ct))
+				return PTR_ERR(ct);
+
+			err = 0;
+			if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+				events = 1 << IPCT_RELATED;
+			else
+				events = 1 << IPCT_NEW;
+
+			if (cda[CTA_LABELS] &&
+			    ctnetlink_attach_labels(ct, cda) == 0)
+				events |= (1 << IPCT_LABEL);
+
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
+						      (1 << IPCT_HELPER) |
+						      (1 << IPCT_PROTOINFO) |
+						      (1 << IPCT_SEQADJ) |
+						      (1 << IPCT_MARK) |
+						      (1 << IPCT_SYNPROXY) |
+						      events,
+						      ct, NETLINK_CB(skb).portid,
+						      nlmsg_report(info->nlh));
+			nf_ct_put(ct);
+		}
+
+		return err;
+	}
+	/* implicit 'else' */
+
+	err = -EEXIST;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
+		err = ctnetlink_change_conntrack(ct, cda);
+		if (err == 0) {
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
+						      (1 << IPCT_HELPER) |
+						      (1 << IPCT_LABEL) |
+						      (1 << IPCT_PROTOINFO) |
+						      (1 << IPCT_SEQADJ) |
+						      (1 << IPCT_MARK) |
+						      (1 << IPCT_SYNPROXY),
+						      ct, NETLINK_CB(skb).portid,
+						      nlmsg_report(info->nlh));
+		}
+	}
+
+	nf_ct_put(ct);
+	return err;
+}
+
+static int
+ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
+				__u16 cpu, const struct ip_conntrack_stat *st)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
+			      IPCTNL_MSG_CT_GET_STATS_CPU);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, htons(cpu));
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
+	    nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
+	    nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
+	    nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
+				htonl(st->insert_failed)) ||
+	    nla_put_be32(skb, CTA_STATS_DROP, htonl(st->drop)) ||
+	    nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) ||
+	    nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) ||
+	    nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
+				htonl(st->search_restart)) ||
+	    nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
+				htonl(st->clash_resolve)) ||
+	    nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
+			 htonl(st->chaintoolong)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int cpu;
+	struct net *net = sock_net(skb->sk);
+
+	if (cb->args[0] == nr_cpu_ids)
+		return 0;
+
+	for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+		const struct ip_conntrack_stat *st;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		st = per_cpu_ptr(net->ct.stat, cpu);
+		if (ctnetlink_ct_stat_cpu_fill_info(skb,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    cpu, st) < 0)
+				break;
+	}
+	cb->args[0] = cpu;
+
+	return skb->len;
+}
+
+static int ctnetlink_stat_ct_cpu(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_ct_stat_cpu_dump,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	return 0;
+}
+
+static int
+ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+			    struct net *net)
+{
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
+	unsigned int nr_conntracks;
+	struct nlmsghdr *nlh;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	nr_conntracks = nf_conntrack_count(net);
+	if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const cda[])
+{
+	struct sk_buff *skb2;
+	int err;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid,
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					  sock_net(skb->sk));
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+}
+
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
+	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
+	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = NF_CT_HELPER_NAME_LEN - 1 },
+	[CTA_EXPECT_ZONE]	= { .type = NLA_U16 },
+	[CTA_EXPECT_FLAGS]	= { .type = NLA_U32 },
+	[CTA_EXPECT_CLASS]	= { .type = NLA_U32 },
+	[CTA_EXPECT_NAT]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_FN]		= { .type = NLA_NUL_STRING },
+};
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+		       struct nf_conntrack_helper *helper,
+		       struct nf_conntrack_tuple *tuple,
+		       struct nf_conntrack_tuple *mask);
+
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+static size_t
+ctnetlink_glue_build_size(const struct nf_conn *ct)
+{
+	return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+	       + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+	       + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+	       + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+	       + nla_total_size(0) /* CTA_PROTOINFO */
+	       + nla_total_size(0) /* CTA_HELP */
+	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+	       + ctnetlink_secctx_size(ct)
+	       + ctnetlink_acct_size(ct)
+	       + ctnetlink_timestamp_size(ct)
+#if IS_ENABLED(CONFIG_NF_NAT)
+	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+	       + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	       + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	       + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
+#endif
+	       + ctnetlink_proto_size(ct)
+	       ;
+}
+
+static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
+{
+	const struct nf_conntrack_zone *zone;
+	struct nlattr *nest_parms;
+
+	zone = nf_ct_zone(ct);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_ORIG) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+				   NF_CT_ZONE_DIR_REPL) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+				   NF_CT_DEFAULT_ZONE_DIR) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_id(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_status(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_timeout(skb, ct, false) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
+	    ctnetlink_dump_timestamp(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_helpinfo(skb, ct) < 0)
+		goto nla_put_failure;
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	if (ct->secmark && ctnetlink_dump_secctx(skb, ct) < 0)
+		goto nla_put_failure;
+#endif
+	if (ct->master && ctnetlink_dump_master(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if ((ct->status & IPS_SEQ_ADJUST) &&
+	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
+		goto nla_put_failure;
+
+	if (ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+		goto nla_put_failure;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (ctnetlink_dump_mark(skb, ct, true) < 0)
+		goto nla_put_failure;
+#endif
+	if (ctnetlink_dump_labels(skb, ct) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int
+ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     u_int16_t ct_attr, u_int16_t ct_info_attr)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, ct_attr);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (__ctnetlink_glue_build(skb, ct) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int
+ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
+{
+	unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
+	unsigned long d = ct->status ^ status;
+
+	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+		/* SEEN_REPLY bit can only be set */
+		return -EBUSY;
+
+	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+		/* ASSURED bit can only be set */
+		return -EBUSY;
+
+	/* This check is less strict than ctnetlink_change_status()
+	 * because callers often flip IPS_EXPECTED bits when sending
+	 * an NFQA_CT attribute to the kernel.  So ignore the
+	 * unchangeable bits but do not error out. Also user programs
+	 * are allowed to clear the bits that they are allowed to change.
+	 */
+	__nf_ct_change_status(ct, status, ~status);
+	return 0;
+}
+
+static int
+ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
+{
+	int err;
+
+	if (cda[CTA_TIMEOUT]) {
+		err = ctnetlink_change_timeout(ct, cda);
+		if (err < 0)
+			return err;
+	}
+	if (cda[CTA_STATUS]) {
+		err = ctnetlink_update_status(ct, cda);
+		if (err < 0)
+			return err;
+	}
+	if (cda[CTA_HELP]) {
+		err = ctnetlink_change_helper(ct, cda);
+		if (err < 0)
+			return err;
+	}
+	if (cda[CTA_LABELS]) {
+		err = ctnetlink_attach_labels(ct, cda);
+		if (err < 0)
+			return err;
+	}
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK]) {
+		ctnetlink_change_mark(ct, cda);
+	}
+#endif
+	return 0;
+}
+
+static int
+ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
+{
+	struct nlattr *cda[CTA_MAX+1];
+	int ret;
+
+	ret = nla_parse_nested_deprecated(cda, CTA_MAX, attr, ct_nla_policy,
+					  NULL);
+	if (ret < 0)
+		return ret;
+
+	return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
+}
+
+static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+				    const struct nf_conn *ct,
+				    struct nf_conntrack_tuple *tuple,
+				    struct nf_conntrack_tuple *mask)
+{
+	int err;
+
+	err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
+				    nf_ct_l3num(ct), NULL);
+	if (err < 0)
+		return err;
+
+	return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
+				     nf_ct_l3num(ct), NULL);
+}
+
+static int
+ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+			     u32 portid, u32 report)
+{
+	struct nlattr *cda[CTA_EXPECT_MAX+1];
+	struct nf_conntrack_tuple tuple, mask;
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conntrack_expect *exp;
+	int err;
+
+	err = nla_parse_nested_deprecated(cda, CTA_EXPECT_MAX, attr,
+					  exp_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
+				       ct, &tuple, &mask);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_EXPECT_HELP_NAME]) {
+		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
+		if (helper == NULL)
+			return -EOPNOTSUPP;
+	}
+
+	exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
+				     helper, &tuple, &mask);
+	if (IS_ERR(exp))
+		return PTR_ERR(exp);
+
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
+	nf_ct_expect_put(exp);
+	return err;
+}
+
+static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo, int diff)
+{
+	if (!(ct->status & IPS_NAT_MASK))
+		return;
+
+	nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+}
+
+static const struct nfnl_ct_hook ctnetlink_glue_hook = {
+	.build_size	= ctnetlink_glue_build_size,
+	.build		= ctnetlink_glue_build,
+	.parse		= ctnetlink_glue_parse,
+	.attach_expect	= ctnetlink_glue_attach_expect,
+	.seq_adjust	= ctnetlink_glue_seqadj,
+};
+#endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */
+
+/***********************************************************************
+ * EXPECT
+ ***********************************************************************/
+
+static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+				    const struct nf_conntrack_tuple *tuple,
+				    u32 type)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, type);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (ctnetlink_dump_tuples(skb, tuple) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
+				   const struct nf_conntrack_tuple *tuple,
+				   const struct nf_conntrack_tuple_mask *mask)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nf_conntrack_tuple m;
+	struct nlattr *nest_parms;
+	int ret;
+
+	memset(&m, 0xFF, sizeof(m));
+	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
+	m.src.u.all = mask->src.u.all;
+	m.src.l3num = tuple->src.l3num;
+	m.dst.protonum = tuple->dst.protonum;
+
+	nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	rcu_read_lock();
+	ret = ctnetlink_dump_tuples_ip(skb, &m);
+	if (ret >= 0) {
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
+		ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
+	}
+	rcu_read_unlock();
+
+	if (unlikely(ret < 0))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static const union nf_inet_addr any_addr;
+#endif
+
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+	static siphash_aligned_key_t exp_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+	a = (unsigned long)exp;
+	b = (unsigned long)exp->helper;
+	c = (unsigned long)exp->master;
+	d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+	return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+	return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
+static int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+			  const struct nf_conntrack_expect *exp)
+{
+	struct nf_conn *master = exp->master;
+	long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
+	struct nf_conn_help *help;
+#if IS_ENABLED(CONFIG_NF_NAT)
+	struct nlattr *nest_parms;
+	struct nf_conntrack_tuple nat_tuple = {};
+#endif
+	struct nf_ct_helper_expectfn *expfn;
+
+	if (timeout < 0)
+		timeout = 0;
+
+	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
+		goto nla_put_failure;
+	if (ctnetlink_exp_dump_tuple(skb,
+				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 CTA_EXPECT_MASTER) < 0)
+		goto nla_put_failure;
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+	if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) ||
+	    exp->saved_proto.all) {
+		nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT);
+		if (!nest_parms)
+			goto nla_put_failure;
+
+		if (nla_put_be32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir)))
+			goto nla_put_failure;
+
+		nat_tuple.src.l3num = nf_ct_l3num(master);
+		nat_tuple.src.u3 = exp->saved_addr;
+		nat_tuple.dst.protonum = nf_ct_protonum(master);
+		nat_tuple.src.u = exp->saved_proto;
+
+		if (ctnetlink_exp_dump_tuple(skb, &nat_tuple,
+						CTA_EXPECT_NAT_TUPLE) < 0)
+	                goto nla_put_failure;
+	        nla_nest_end(skb, nest_parms);
+	}
+#endif
+	if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
+	    nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
+	    nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
+	    nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
+		goto nla_put_failure;
+	help = nfct_help(master);
+	if (help) {
+		struct nf_conntrack_helper *helper;
+
+		helper = rcu_dereference(help->helper);
+		if (helper &&
+		    nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
+			goto nla_put_failure;
+	}
+	expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn);
+	if (expfn != NULL &&
+	    nla_put_string(skb, CTA_EXPECT_FN, expfn->name))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
+			int event, const struct nf_conntrack_expect *exp)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags,
+			   exp->tuple.src.l3num, NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int
+ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
+{
+	struct nf_conntrack_expect *exp = item->exp;
+	struct net *net = nf_ct_exp_net(exp);
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	unsigned int type, group;
+	int flags = 0;
+
+	if (events & (1 << IPEXP_DESTROY)) {
+		type = IPCTNL_MSG_EXP_DELETE;
+		group = NFNLGRP_CONNTRACK_EXP_DESTROY;
+	} else if (events & (1 << IPEXP_NEW)) {
+		type = IPCTNL_MSG_EXP_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+		group = NFNLGRP_CONNTRACK_EXP_NEW;
+	} else
+		return 0;
+
+	if (!item->report && !nfnetlink_has_listeners(net, group))
+		return 0;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
+	nlh = nfnl_msg_put(skb, item->portid, 0, type, flags,
+			   exp->tuple.src.l3num, NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+nlmsg_failure:
+	kfree_skb(skb);
+errout:
+	nfnetlink_set_err(net, 0, 0, -ENOBUFS);
+	return 0;
+}
+#endif
+static int ctnetlink_exp_done(struct netlink_callback *cb)
+{
+	if (cb->args[1])
+		nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]);
+	return 0;
+}
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nf_conntrack_expect *exp, *last;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	rcu_read_lock();
+	last = (struct nf_conntrack_expect *)cb->args[1];
+	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
+restart:
+		hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]],
+					 hnode) {
+			if (l3proto && exp->tuple.src.l3num != l3proto)
+				continue;
+
+			if (!net_eq(nf_ct_net(exp->master), net))
+				continue;
+
+			if (cb->args[1]) {
+				if (exp != last)
+					continue;
+				cb->args[1] = 0;
+			}
+			if (ctnetlink_exp_fill_info(skb,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    IPCTNL_MSG_EXP_NEW,
+						    exp) < 0) {
+				if (!refcount_inc_not_zero(&exp->use))
+					continue;
+				cb->args[1] = (unsigned long)exp;
+				goto out;
+			}
+		}
+		if (cb->args[1]) {
+			cb->args[1] = 0;
+			goto restart;
+		}
+	}
+out:
+	rcu_read_unlock();
+	if (last)
+		nf_ct_expect_put(last);
+
+	return skb->len;
+}
+
+static int
+ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp, *last;
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nf_conn *ct = cb->data;
+	struct nf_conn_help *help = nfct_help(ct);
+	u_int8_t l3proto = nfmsg->nfgen_family;
+
+	if (cb->args[0])
+		return 0;
+
+	rcu_read_lock();
+	last = (struct nf_conntrack_expect *)cb->args[1];
+restart:
+	hlist_for_each_entry_rcu(exp, &help->expectations, lnode) {
+		if (l3proto && exp->tuple.src.l3num != l3proto)
+			continue;
+		if (cb->args[1]) {
+			if (exp != last)
+				continue;
+			cb->args[1] = 0;
+		}
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    exp) < 0) {
+			if (!refcount_inc_not_zero(&exp->use))
+				continue;
+			cb->args[1] = (unsigned long)exp;
+			goto out;
+		}
+	}
+	if (cb->args[1]) {
+		cb->args[1] = 0;
+		goto restart;
+	}
+	cb->args[0] = 1;
+out:
+	rcu_read_unlock();
+	if (last)
+		nf_ct_expect_put(last);
+
+	return skb->len;
+}
+
+static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
+				 struct sk_buff *skb,
+				 const struct nlmsghdr *nlh,
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
+{
+	int err;
+	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct nf_conntrack_zone zone;
+	struct netlink_dump_control c = {
+		.dump = ctnetlink_exp_ct_dump_table,
+		.done = ctnetlink_exp_done,
+	};
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(net, &zone, &tuple);
+	if (!h)
+		return -ENOENT;
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	/* No expectation linked to this connection tracking. */
+	if (!nfct_help(ct)) {
+		nf_ct_put(ct);
+		return 0;
+	}
+
+	c.data = ct;
+
+	err = netlink_dump_start(ctnl, skb, nlh, &c);
+	nf_ct_put(ct);
+
+	return err;
+}
+
+static int ctnetlink_get_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
+{
+	u_int8_t u3 = info->nfmsg->nfgen_family;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_zone zone;
+	struct sk_buff *skb2;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		if (cda[CTA_EXPECT_MASTER])
+			return ctnetlink_dump_exp_ct(info->net, info->sk, skb,
+						     info->nlh, cda,
+						     info->extack);
+		else {
+			struct netlink_dump_control c = {
+				.dump = ctnetlink_exp_dump_table,
+				.done = ctnetlink_exp_done,
+			};
+			return netlink_dump_start(info->sk, skb, info->nlh, &c);
+		}
+	}
+
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	if (cda[CTA_EXPECT_TUPLE])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+					    u3, NULL);
+	else if (cda[CTA_EXPECT_MASTER])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+					    u3, NULL);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+	if (!exp)
+		return -ENOENT;
+
+	if (cda[CTA_EXPECT_ID]) {
+		__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+
+		if (id != nf_expect_get_id(exp)) {
+			nf_ct_expect_put(exp);
+			return -ENOENT;
+		}
+	}
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2) {
+		nf_ct_expect_put(exp);
+		return -ENOMEM;
+	}
+
+	rcu_read_lock();
+	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
+				      info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+				      exp);
+	rcu_read_unlock();
+	nf_ct_expect_put(exp);
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+}
+
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+	struct nf_conntrack_helper *helper;
+	const struct nf_conn_help *m_help;
+	const char *name = data;
+
+	m_help = nfct_help(exp->master);
+
+	helper = rcu_dereference(m_help->helper);
+	if (!helper)
+		return false;
+
+	return strcmp(helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+	return true;
+}
+
+static int ctnetlink_del_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
+{
+	u_int8_t u3 = info->nfmsg->nfgen_family;
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	int err;
+
+	if (cda[CTA_EXPECT_TUPLE]) {
+		/* delete a single expect by tuple */
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+					    u3, NULL);
+		if (err < 0)
+			return err;
+
+		/* bump usage count to 2 */
+		exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
+		if (!exp)
+			return -ENOENT;
+
+		if (cda[CTA_EXPECT_ID]) {
+			__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+			if (ntohl(id) != (u32)(unsigned long)exp) {
+				nf_ct_expect_put(exp);
+				return -ENOENT;
+			}
+		}
+
+		/* after list removal, usage count == 1 */
+		spin_lock_bh(&nf_conntrack_expect_lock);
+		if (del_timer(&exp->timeout)) {
+			nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
+						   nlmsg_report(info->nlh));
+			nf_ct_expect_put(exp);
+		}
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+		/* have to put what we 'get' above.
+		 * after this line usage count == 0 */
+		nf_ct_expect_put(exp);
+	} else if (cda[CTA_EXPECT_HELP_NAME]) {
+		char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+		nf_ct_expect_iterate_net(info->net, expect_iter_name, name,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(info->nlh));
+	} else {
+		/* This basically means we have to flush everything*/
+		nf_ct_expect_iterate_net(info->net, expect_iter_all, NULL,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(info->nlh));
+	}
+
+	return 0;
+}
+static int
+ctnetlink_change_expect(struct nf_conntrack_expect *x,
+			const struct nlattr * const cda[])
+{
+	if (cda[CTA_EXPECT_TIMEOUT]) {
+		if (!del_timer(&x->timeout))
+			return -ETIME;
+
+		x->timeout.expires = jiffies +
+			ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
+		add_timer(&x->timeout);
+	}
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
+	[CTA_EXPECT_NAT_DIR]	= { .type = NLA_U32 },
+	[CTA_EXPECT_NAT_TUPLE]	= { .type = NLA_NESTED },
+};
+#endif
+
+static int
+ctnetlink_parse_expect_nat(const struct nlattr *attr,
+			   struct nf_conntrack_expect *exp,
+			   u_int8_t u3)
+{
+#if IS_ENABLED(CONFIG_NF_NAT)
+	struct nlattr *tb[CTA_EXPECT_NAT_MAX+1];
+	struct nf_conntrack_tuple nat_tuple = {};
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, CTA_EXPECT_NAT_MAX, attr,
+					  exp_nat_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple((const struct nlattr * const *)tb,
+				    &nat_tuple, CTA_EXPECT_NAT_TUPLE,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+
+	exp->saved_addr = nat_tuple.src.u3;
+	exp->saved_proto = nat_tuple.src.u;
+	exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR]));
+
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+		       struct nf_conntrack_helper *helper,
+		       struct nf_conntrack_tuple *tuple,
+		       struct nf_conntrack_tuple *mask)
+{
+	u_int32_t class = 0;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn_help *help;
+	int err;
+
+	help = nfct_help(ct);
+	if (!help)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (cda[CTA_EXPECT_CLASS] && helper) {
+		class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
+		if (class > helper->expect_class_max)
+			return ERR_PTR(-EINVAL);
+	}
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp)
+		return ERR_PTR(-ENOMEM);
+
+	if (cda[CTA_EXPECT_FLAGS]) {
+		exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
+		exp->flags &= ~NF_CT_EXPECT_USERSPACE;
+	} else {
+		exp->flags = 0;
+	}
+	if (cda[CTA_EXPECT_FN]) {
+		const char *name = nla_data(cda[CTA_EXPECT_FN]);
+		struct nf_ct_helper_expectfn *expfn;
+
+		expfn = nf_ct_helper_expectfn_find_by_name(name);
+		if (expfn == NULL) {
+			err = -EINVAL;
+			goto err_out;
+		}
+		exp->expectfn = expfn->expectfn;
+	} else
+		exp->expectfn = NULL;
+
+	exp->class = class;
+	exp->master = ct;
+	exp->helper = helper;
+	exp->tuple = *tuple;
+	exp->mask.src.u3 = mask->src.u3;
+	exp->mask.src.u.all = mask->src.u.all;
+
+	if (cda[CTA_EXPECT_NAT]) {
+		err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
+						 exp, nf_ct_l3num(ct));
+		if (err < 0)
+			goto err_out;
+	}
+	return exp;
+err_out:
+	nf_ct_expect_put(exp);
+	return ERR_PTR(err);
+}
+
+static int
+ctnetlink_create_expect(struct net *net,
+			const struct nf_conntrack_zone *zone,
+			const struct nlattr * const cda[],
+			u_int8_t u3, u32 portid, int report)
+{
+	struct nf_conntrack_tuple tuple, mask, master_tuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *ct;
+	int err;
+
+	/* caller guarantees that those three CTA_EXPECT_* exist */
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+
+	/* Look for master conntrack of this expectation */
+	h = nf_conntrack_find_get(net, zone, &master_tuple);
+	if (!h)
+		return -ENOENT;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	rcu_read_lock();
+	if (cda[CTA_EXPECT_HELP_NAME]) {
+		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+		helper = __nf_conntrack_helper_find(helpname, u3,
+						    nf_ct_protonum(ct));
+		if (helper == NULL) {
+			rcu_read_unlock();
+#ifdef CONFIG_MODULES
+			if (request_module("nfct-helper-%s", helpname) < 0) {
+				err = -EOPNOTSUPP;
+				goto err_ct;
+			}
+			rcu_read_lock();
+			helper = __nf_conntrack_helper_find(helpname, u3,
+							    nf_ct_protonum(ct));
+			if (helper) {
+				err = -EAGAIN;
+				goto err_rcu;
+			}
+			rcu_read_unlock();
+#endif
+			err = -EOPNOTSUPP;
+			goto err_ct;
+		}
+	}
+
+	exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+	if (IS_ERR(exp)) {
+		err = PTR_ERR(exp);
+		goto err_rcu;
+	}
+
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
+	nf_ct_expect_put(exp);
+err_rcu:
+	rcu_read_unlock();
+err_ct:
+	nf_ct_put(ct);
+	return err;
+}
+
+static int ctnetlink_new_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
+{
+	u_int8_t u3 = info->nfmsg->nfgen_family;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_zone zone;
+	int err;
+
+	if (!cda[CTA_EXPECT_TUPLE]
+	    || !cda[CTA_EXPECT_MASK]
+	    || !cda[CTA_EXPECT_MASTER])
+		return -EINVAL;
+
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+				    u3, NULL);
+	if (err < 0)
+		return err;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	exp = __nf_ct_expect_find(info->net, &zone, &tuple);
+	if (!exp) {
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+		err = -ENOENT;
+		if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
+			err = ctnetlink_create_expect(info->net, &zone, cda, u3,
+						      NETLINK_CB(skb).portid,
+						      nlmsg_report(info->nlh));
+		}
+		return err;
+	}
+
+	err = -EEXIST;
+	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_expect(exp, cda);
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+
+	return err;
+}
+
+static int
+ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
+			     const struct ip_conntrack_stat *st)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
+			      IPCTNL_MSG_EXP_GET_STATS_CPU);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, htons(cpu));
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) ||
+	    nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) ||
+	    nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int cpu;
+	struct net *net = sock_net(skb->sk);
+
+	if (cb->args[0] == nr_cpu_ids)
+		return 0;
+
+	for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+		const struct ip_conntrack_stat *st;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		st = per_cpu_ptr(net->ct.stat, cpu);
+		if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 cpu, st) < 0)
+			break;
+	}
+	cb->args[0] = cpu;
+
+	return skb->len;
+}
+
+static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const cda[])
+{
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnetlink_exp_stat_cpu_dump,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static struct nf_ct_event_notifier ctnl_notifier = {
+	.ct_event = ctnetlink_conntrack_event,
+	.exp_event = ctnetlink_expect_event,
+};
+#endif
+
+static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+	[IPCTNL_MSG_CT_NEW]	= {
+		.call		= ctnetlink_new_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET]	= {
+		.call		= ctnetlink_get_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_DELETE]	= {
+		.call		= ctnetlink_del_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET_CTRZERO] = {
+		.call		= ctnetlink_get_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET_STATS_CPU] = {
+		.call		= ctnetlink_stat_ct_cpu,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_STATS] = {
+		.call		= ctnetlink_stat_ct,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_DYING] = {
+		.call		= ctnetlink_get_ct_dying,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_UNCONFIRMED]	= {
+		.call		= ctnetlink_get_ct_unconfirmed,
+		.type		= NFNL_CB_MUTEX,
+	},
+};
+
+static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+	[IPCTNL_MSG_EXP_GET] = {
+		.call		= ctnetlink_get_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_NEW] = {
+		.call		= ctnetlink_new_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_DELETE] = {
+		.call		= ctnetlink_del_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_GET_STATS_CPU] = {
+		.call		= ctnetlink_stat_exp_cpu,
+		.type		= NFNL_CB_MUTEX,
+	},
+};
+
+static const struct nfnetlink_subsystem ctnl_subsys = {
+	.name				= "conntrack",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
+	.cb_count			= IPCTNL_MSG_MAX,
+	.cb				= ctnl_cb,
+};
+
+static const struct nfnetlink_subsystem ctnl_exp_subsys = {
+	.name				= "conntrack_expect",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
+	.cb_count			= IPCTNL_MSG_EXP_MAX,
+	.cb				= ctnl_exp_cb,
+};
+
+MODULE_ALIAS("ip_conntrack_netlink");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
+
+static int __net_init ctnetlink_net_init(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_conntrack_register_notifier(net, &ctnl_notifier);
+#endif
+	return 0;
+}
+
+static void ctnetlink_net_pre_exit(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_conntrack_unregister_notifier(net);
+#endif
+}
+
+static struct pernet_operations ctnetlink_net_ops = {
+	.init		= ctnetlink_net_init,
+	.pre_exit	= ctnetlink_net_pre_exit,
+};
+
+static int __init ctnetlink_init(void)
+{
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx));
+
+	ret = nfnetlink_subsys_register(&ctnl_subsys);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+
+	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		goto err_unreg_subsys;
+	}
+
+	ret = register_pernet_subsys(&ctnetlink_net_ops);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot register pernet operations\n");
+		goto err_unreg_exp_subsys;
+	}
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+	/* setup interaction between nf_queue and nf_conntrack_netlink. */
+	RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
+#endif
+	return 0;
+
+err_unreg_exp_subsys:
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+err_unreg_subsys:
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+	return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+	unregister_pernet_subsys(&ctnetlink_net_ops);
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
+#endif
+	synchronize_rcu();
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
new file mode 100644
index 000000000..4c679638d
--- /dev/null
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * Limitations:
+ * 	 - We blindly assume that control connections are always
+ * 	   established in PNS->PAC direction.  This is a violation
+ *	   of RFC 2637
+ * 	 - We can only support one single call within each session
+ * TODO:
+ *	 - testing of incoming PPTP calls
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+MODULE_ALIAS("ip_conntrack_pptp");
+MODULE_ALIAS_NFCT_HELPER("pptp");
+
+static DEFINE_SPINLOCK(nf_pptp_lock);
+
+const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook);
+
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+/* PptpControlMessageType names */
+static const char *const pptp_msg_name_array[PPTP_MSG_MAX + 1] = {
+	[0]				= "UNKNOWN_MESSAGE",
+	[PPTP_START_SESSION_REQUEST]	= "START_SESSION_REQUEST",
+	[PPTP_START_SESSION_REPLY]	= "START_SESSION_REPLY",
+	[PPTP_STOP_SESSION_REQUEST]	= "STOP_SESSION_REQUEST",
+	[PPTP_STOP_SESSION_REPLY]	= "STOP_SESSION_REPLY",
+	[PPTP_ECHO_REQUEST]		= "ECHO_REQUEST",
+	[PPTP_ECHO_REPLY]		= "ECHO_REPLY",
+	[PPTP_OUT_CALL_REQUEST]		= "OUT_CALL_REQUEST",
+	[PPTP_OUT_CALL_REPLY]		= "OUT_CALL_REPLY",
+	[PPTP_IN_CALL_REQUEST]		= "IN_CALL_REQUEST",
+	[PPTP_IN_CALL_REPLY]		= "IN_CALL_REPLY",
+	[PPTP_IN_CALL_CONNECT]		= "IN_CALL_CONNECT",
+	[PPTP_CALL_CLEAR_REQUEST]	= "CALL_CLEAR_REQUEST",
+	[PPTP_CALL_DISCONNECT_NOTIFY]	= "CALL_DISCONNECT_NOTIFY",
+	[PPTP_WAN_ERROR_NOTIFY]		= "WAN_ERROR_NOTIFY",
+	[PPTP_SET_LINK_INFO]		= "SET_LINK_INFO"
+};
+
+const char *pptp_msg_name(u_int16_t msg)
+{
+	if (msg > PPTP_MSG_MAX)
+		return pptp_msg_name_array[0];
+
+	return pptp_msg_name_array[msg];
+}
+EXPORT_SYMBOL(pptp_msg_name);
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT 		(10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
+
+static void pptp_expectfn(struct nf_conn *ct,
+			 struct nf_conntrack_expect *exp)
+{
+	const struct nf_nat_pptp_hook *hook;
+	struct net *net = nf_ct_net(ct);
+	pr_debug("increasing timeouts\n");
+
+	/* increase timeout of GRE data channel conntrack entry */
+	ct->proto.gre.timeout	     = PPTP_GRE_TIMEOUT;
+	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+	/* Can you see how rusty this code is, compared with the pre-2.6.11
+	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+	hook = rcu_dereference(nf_nat_pptp_hook);
+	if (hook && ct->master->status & IPS_NAT_MASK)
+		hook->expectfn(ct, exp);
+	else {
+		struct nf_conntrack_tuple inv_t;
+		struct nf_conntrack_expect *exp_other;
+
+		/* obviously this tuple inversion only works until you do NAT */
+		nf_ct_invert_tuple(&inv_t, &exp->tuple);
+		pr_debug("trying to unexpect other dir: ");
+		nf_ct_dump_tuple(&inv_t);
+
+		exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
+		if (exp_other) {
+			/* delete other expectation.  */
+			pr_debug("found\n");
+			nf_ct_unexpect_related(exp_other);
+			nf_ct_expect_put(exp_other);
+		} else {
+			pr_debug("not found\n");
+		}
+	}
+}
+
+static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
+				  const struct nf_conntrack_tuple *t)
+{
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_zone *zone;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *sibling;
+
+	pr_debug("trying to timeout ct or exp for tuple ");
+	nf_ct_dump_tuple(t);
+
+	zone = nf_ct_zone(ct);
+	h = nf_conntrack_find_get(net, zone, t);
+	if (h)  {
+		sibling = nf_ct_tuplehash_to_ctrack(h);
+		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
+		sibling->proto.gre.timeout	  = 0;
+		sibling->proto.gre.stream_timeout = 0;
+		nf_ct_kill(sibling);
+		nf_ct_put(sibling);
+		return 1;
+	} else {
+		exp = nf_ct_expect_find_get(net, zone, t);
+		if (exp) {
+			pr_debug("unexpect_related of expect %p\n", exp);
+			nf_ct_unexpect_related(exp);
+			nf_ct_expect_put(exp);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
+	struct nf_conntrack_tuple t;
+
+	nf_ct_gre_keymap_destroy(ct);
+
+	/* try original (pns->pac) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = ct_pptp_info->pns_call_id;
+	t.dst.u.gre.key = ct_pptp_info->pac_call_id;
+	if (!destroy_sibling_or_exp(net, ct, &t))
+		pr_debug("failed to timeout original pns->pac ct/exp\n");
+
+	/* try reply (pac->pns) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = ct_pptp_info->pac_call_id;
+	t.dst.u.gre.key = ct_pptp_info->pns_call_id;
+	if (!destroy_sibling_or_exp(net, ct, &t))
+		pr_debug("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
+{
+	struct nf_conntrack_expect *exp_orig, *exp_reply;
+	const struct nf_nat_pptp_hook *hook;
+	enum ip_conntrack_dir dir;
+	int ret = 1;
+
+	exp_orig = nf_ct_expect_alloc(ct);
+	if (exp_orig == NULL)
+		goto out;
+
+	exp_reply = nf_ct_expect_alloc(ct);
+	if (exp_reply == NULL)
+		goto out_put_orig;
+
+	/* original direction, PNS->PAC */
+	dir = IP_CT_DIR_ORIGINAL;
+	nf_ct_expect_init(exp_orig, NF_CT_EXPECT_CLASS_DEFAULT,
+			  nf_ct_l3num(ct),
+			  &ct->tuplehash[dir].tuple.src.u3,
+			  &ct->tuplehash[dir].tuple.dst.u3,
+			  IPPROTO_GRE, &peer_callid, &callid);
+	exp_orig->expectfn = pptp_expectfn;
+
+	/* reply direction, PAC->PNS */
+	dir = IP_CT_DIR_REPLY;
+	nf_ct_expect_init(exp_reply, NF_CT_EXPECT_CLASS_DEFAULT,
+			  nf_ct_l3num(ct),
+			  &ct->tuplehash[dir].tuple.src.u3,
+			  &ct->tuplehash[dir].tuple.dst.u3,
+			  IPPROTO_GRE, &callid, &peer_callid);
+	exp_reply->expectfn = pptp_expectfn;
+
+	hook = rcu_dereference(nf_nat_pptp_hook);
+	if (hook && ct->status & IPS_NAT_MASK)
+		hook->exp_gre(exp_orig, exp_reply);
+	if (nf_ct_expect_related(exp_orig, 0) != 0)
+		goto out_put_both;
+	if (nf_ct_expect_related(exp_reply, 0) != 0)
+		goto out_unexpect_orig;
+
+	/* Add GRE keymap entries */
+	if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0)
+		goto out_unexpect_both;
+	if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) {
+		nf_ct_gre_keymap_destroy(ct);
+		goto out_unexpect_both;
+	}
+	ret = 0;
+
+out_put_both:
+	nf_ct_expect_put(exp_reply);
+out_put_orig:
+	nf_ct_expect_put(exp_orig);
+out:
+	return ret;
+
+out_unexpect_both:
+	nf_ct_unexpect_related(exp_reply);
+out_unexpect_orig:
+	nf_ct_unexpect_related(exp_orig);
+	goto out_put_both;
+}
+
+static int
+pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq,
+		 unsigned int reqlen,
+		 struct nf_conn *ct,
+		 enum ip_conntrack_info ctinfo)
+{
+	struct nf_ct_pptp_master *info = nfct_help_data(ct);
+	const struct nf_nat_pptp_hook *hook;
+	u_int16_t msg;
+	__be16 cid = 0, pcid = 0;
+
+	msg = ntohs(ctlh->messageType);
+	pr_debug("inbound control message %s\n", pptp_msg_name(msg));
+
+	switch (msg) {
+	case PPTP_START_SESSION_REPLY:
+		/* server confirms new control session */
+		if (info->sstate < PPTP_SESSION_REQUESTED)
+			goto invalid;
+		if (pptpReq->srep.resultCode == PPTP_START_OK)
+			info->sstate = PPTP_SESSION_CONFIRMED;
+		else
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_STOP_SESSION_REPLY:
+		/* server confirms end of control session */
+		if (info->sstate > PPTP_SESSION_STOPREQ)
+			goto invalid;
+		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+			info->sstate = PPTP_SESSION_NONE;
+		else
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_OUT_CALL_REPLY:
+		/* server accepted call, we now expect GRE frames */
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		if (info->cstate != PPTP_CALL_OUT_REQ &&
+		    info->cstate != PPTP_CALL_OUT_CONF)
+			goto invalid;
+
+		cid = pptpReq->ocack.callID;
+		pcid = pptpReq->ocack.peersCallID;
+		if (info->pns_call_id != pcid)
+			goto invalid;
+		pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name(msg),
+			 ntohs(cid), ntohs(pcid));
+
+		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_OUT_CONF;
+			info->pac_call_id = cid;
+			exp_gre(ct, cid, pcid);
+		} else
+			info->cstate = PPTP_CALL_NONE;
+		break;
+
+	case PPTP_IN_CALL_REQUEST:
+		/* server tells us about incoming call request */
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+
+		cid = pptpReq->icreq.callID;
+		pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
+		info->cstate = PPTP_CALL_IN_REQ;
+		info->pac_call_id = cid;
+		break;
+
+	case PPTP_IN_CALL_CONNECT:
+		/* server tells us about incoming call established */
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		if (info->cstate != PPTP_CALL_IN_REP &&
+		    info->cstate != PPTP_CALL_IN_CONF)
+			goto invalid;
+
+		pcid = pptpReq->iccon.peersCallID;
+		cid = info->pac_call_id;
+
+		if (info->pns_call_id != pcid)
+			goto invalid;
+
+		pr_debug("%s, PCID=%X\n", pptp_msg_name(msg), ntohs(pcid));
+		info->cstate = PPTP_CALL_IN_CONF;
+
+		/* we expect a GRE connection from PAC to PNS */
+		exp_gre(ct, cid, pcid);
+		break;
+
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		/* server confirms disconnect */
+		cid = pptpReq->disc.callID;
+		pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+		break;
+
+	case PPTP_WAN_ERROR_NOTIFY:
+	case PPTP_SET_LINK_INFO:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+
+	default:
+		goto invalid;
+	}
+
+	hook = rcu_dereference(nf_nat_pptp_hook);
+	if (hook && ct->status & IPS_NAT_MASK)
+		return hook->inbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
+	return NF_ACCEPT;
+
+invalid:
+	pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+		 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+		 pptp_msg_name(msg),
+		 msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+		 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	return NF_ACCEPT;
+}
+
+static int
+pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq,
+		  unsigned int reqlen,
+		  struct nf_conn *ct,
+		  enum ip_conntrack_info ctinfo)
+{
+	struct nf_ct_pptp_master *info = nfct_help_data(ct);
+	const struct nf_nat_pptp_hook *hook;
+	u_int16_t msg;
+	__be16 cid = 0, pcid = 0;
+
+	msg = ntohs(ctlh->messageType);
+	pr_debug("outbound control message %s\n", pptp_msg_name(msg));
+
+	switch (msg) {
+	case PPTP_START_SESSION_REQUEST:
+		/* client requests for new control session */
+		if (info->sstate != PPTP_SESSION_NONE)
+			goto invalid;
+		info->sstate = PPTP_SESSION_REQUESTED;
+		break;
+
+	case PPTP_STOP_SESSION_REQUEST:
+		/* client requests end of control session */
+		info->sstate = PPTP_SESSION_STOPREQ;
+		break;
+
+	case PPTP_OUT_CALL_REQUEST:
+		/* client initiating connection to server */
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		info->cstate = PPTP_CALL_OUT_REQ;
+		/* track PNS call id */
+		cid = pptpReq->ocreq.callID;
+		pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
+		info->pns_call_id = cid;
+		break;
+
+	case PPTP_IN_CALL_REPLY:
+		/* client answers incoming call */
+		if (info->cstate != PPTP_CALL_IN_REQ &&
+		    info->cstate != PPTP_CALL_IN_REP)
+			goto invalid;
+
+		cid = pptpReq->icack.callID;
+		pcid = pptpReq->icack.peersCallID;
+		if (info->pac_call_id != pcid)
+			goto invalid;
+		pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name(msg),
+			 ntohs(cid), ntohs(pcid));
+
+		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+			/* part two of the three-way handshake */
+			info->cstate = PPTP_CALL_IN_REP;
+			info->pns_call_id = cid;
+		} else
+			info->cstate = PPTP_CALL_NONE;
+		break;
+
+	case PPTP_CALL_CLEAR_REQUEST:
+		/* client requests hangup of call */
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		/* FUTURE: iterate over all calls and check if
+		 * call ID is valid.  We don't do this without newnat,
+		 * because we only know about last call */
+		info->cstate = PPTP_CALL_CLEAR_REQ;
+		break;
+
+	case PPTP_SET_LINK_INFO:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+
+	default:
+		goto invalid;
+	}
+
+	hook = rcu_dereference(nf_nat_pptp_hook);
+	if (hook && ct->status & IPS_NAT_MASK)
+		return hook->outbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
+	return NF_ACCEPT;
+
+invalid:
+	pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+		 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+		 pptp_msg_name(msg),
+		 msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+		 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	return NF_ACCEPT;
+}
+
+static const unsigned int pptp_msg_size[] = {
+	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
+	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
+	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
+	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
+	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
+	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
+	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
+	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
+	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
+	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
+	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
+	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
+};
+
+/* track caller id inside control connection, call expect_related */
+static int
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
+		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+
+{
+	int dir = CTINFO2DIR(ctinfo);
+	const struct nf_ct_pptp_master *info = nfct_help_data(ct);
+	const struct tcphdr *tcph;
+	struct tcphdr _tcph;
+	const struct pptp_pkt_hdr *pptph;
+	struct pptp_pkt_hdr _pptph;
+	struct PptpControlHeader _ctlh, *ctlh;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	unsigned int tcplen = skb->len - protoff;
+	unsigned int datalen, reqlen, nexthdr_off;
+	int oldsstate, oldcstate;
+	int ret;
+	u_int16_t msg;
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+	if (!nf_ct_is_confirmed(ct) && (ct->status & IPS_NAT_MASK)) {
+		struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+		if (!nat && !nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC))
+			return NF_DROP;
+	}
+#endif
+	/* don't do any tracking before tcp handshake complete */
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	nexthdr_off = protoff;
+	tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
+	if (!tcph)
+		return NF_ACCEPT;
+
+	nexthdr_off += tcph->doff * 4;
+	datalen = tcplen - tcph->doff * 4;
+
+	pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
+	if (!pptph) {
+		pr_debug("no full PPTP header, can't track\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_pptph);
+	datalen -= sizeof(_pptph);
+
+	/* if it's not a control message we can't do anything with it */
+	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+		pr_debug("not a control packet\n");
+		return NF_ACCEPT;
+	}
+
+	ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	msg = ntohs(ctlh->messageType);
+	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+		return NF_ACCEPT;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+
+	pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
+	oldsstate = info->sstate;
+	oldcstate = info->cstate;
+
+	spin_lock_bh(&nf_pptp_lock);
+
+	/* FIXME: We just blindly assume that the control connection is always
+	 * established from PNS->PAC.  However, RFC makes no guarantee */
+	if (dir == IP_CT_DIR_ORIGINAL)
+		/* client -> server (PNS -> PAC) */
+		ret = pptp_outbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
+					ctinfo);
+	else
+		/* server -> client (PAC -> PNS) */
+		ret = pptp_inbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
+				       ctinfo);
+	pr_debug("sstate: %d->%d, cstate: %d->%d\n",
+		 oldsstate, info->sstate, oldcstate, info->cstate);
+	spin_unlock_bh(&nf_pptp_lock);
+
+	return ret;
+}
+
+static const struct nf_conntrack_expect_policy pptp_exp_policy = {
+	.max_expected	= 2,
+	.timeout	= 5 * 60,
+};
+
+/* control protocol helper */
+static struct nf_conntrack_helper pptp __read_mostly = {
+	.name			= "pptp",
+	.me			= THIS_MODULE,
+	.tuple.src.l3num	= AF_INET,
+	.tuple.src.u.tcp.port	= cpu_to_be16(PPTP_CONTROL_PORT),
+	.tuple.dst.protonum	= IPPROTO_TCP,
+	.help			= conntrack_pptp_help,
+	.destroy		= pptp_destroy_siblings,
+	.expect_policy		= &pptp_exp_policy,
+};
+
+static int __init nf_conntrack_pptp_init(void)
+{
+	NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master));
+
+	return nf_conntrack_helper_register(&pptp);
+}
+
+static void __exit nf_conntrack_pptp_fini(void)
+{
+	nf_conntrack_helper_unregister(&pptp);
+}
+
+module_init(nf_conntrack_pptp_init);
+module_exit(nf_conntrack_pptp_fini);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
new file mode 100644
index 000000000..895b09cbd
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+#include <net/netfilter/nf_log.h>
+
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+static DEFINE_MUTEX(nf_ct_proto_mutex);
+
+#ifdef CONFIG_SYSCTL
+__printf(4, 5)
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+			    const struct nf_hook_state *state,
+			    u8 protonum,
+			    const char *fmt, ...)
+{
+	struct net *net = state->net;
+	struct va_format vaf;
+	va_list args;
+
+	if (net->ct.sysctl_log_invalid != protonum &&
+	    net->ct.sysctl_log_invalid != IPPROTO_RAW)
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
+		      NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
+
+__printf(4, 5)
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+			       const struct nf_conn *ct,
+			       const struct nf_hook_state *state,
+			       const char *fmt, ...)
+{
+	struct va_format vaf;
+	struct net *net;
+	va_list args;
+
+	net = nf_ct_net(ct);
+	if (likely(net->ct.sysctl_log_invalid == 0))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	nf_l4proto_log_invalid(skb, state,
+			       nf_ct_protonum(ct), "%pV", &vaf);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
+#endif
+
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
+{
+	switch (l4proto) {
+	case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
+	case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
+	case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
+#endif /* CONFIG_IPV6 */
+	}
+
+	return &nf_conntrack_l4proto_generic;
+};
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
+
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	const struct nf_conn_help *help;
+
+	help = nfct_help(ct);
+	if (help) {
+		const struct nf_conntrack_helper *helper;
+		int ret;
+
+		/* rcu_read_lock()ed by nf_hook_thresh */
+		helper = rcu_dereference(help->helper);
+		if (helper) {
+			ret = helper->help(skb,
+					   protoff,
+					   ct, ctinfo);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+	}
+
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+EXPORT_SYMBOL_GPL(nf_confirm);
+
+static bool in_vrf_postrouting(const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (state->hook == NF_INET_POST_ROUTING &&
+	    netif_is_l3_master(state->out))
+		return true;
+#endif
+	return false;
+}
+
+static unsigned int ipv4_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return nf_conntrack_confirm(skb);
+
+	if (in_vrf_postrouting(state))
+		return NF_ACCEPT;
+
+	return nf_confirm(skb,
+			  skb_network_offset(skb) + ip_hdrlen(skb),
+			  ct, ctinfo);
+}
+
+static unsigned int ipv4_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(skb, state);
+}
+
+static unsigned int ipv4_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *tmpl;
+
+		tmpl = nf_ct_get(skb, &ctinfo);
+		if (tmpl && nf_ct_is_template(tmpl)) {
+			/* when skipping ct, clear templates to avoid fooling
+			 * later targets/matches
+			 */
+			skb->_nfct = 0;
+			nf_ct_put(tmpl);
+		}
+		return NF_ACCEPT;
+	}
+
+	return nf_conntrack_in(skb, state);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ * make it the first hook.
+ */
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
+	{
+		.hook		= ipv4_conntrack_in,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_conntrack_local,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+};
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ * blame them).
+ * Reversing the socket's dst/src point of view gives us the reply
+ * mapping.
+ */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+
+	memset(&tuple, 0, sizeof(tuple));
+
+	lock_sock(sk);
+	tuple.src.u3.ip = inet->inet_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.ip = inet->inet_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.src.l3num = PF_INET;
+	tuple.dst.protonum = sk->sk_protocol;
+	release_sock(sk);
+
+	/* We only do TCP and SCTP at the moment: is there a better way? */
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP) {
+		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+		return -ENOPROTOOPT;
+	}
+
+	if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
+		pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
+			 *len, sizeof(struct sockaddr_in));
+		return -EINVAL;
+	}
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (h) {
+		struct sockaddr_in sin;
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u.tcp.port;
+		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u3.ip;
+		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+		nf_ct_put(ct);
+		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+			return -EFAULT;
+		else
+			return 0;
+	}
+	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+	return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_ORIGINAL_DST,
+	.get_optmax	= SO_ORIGINAL_DST + 1,
+	.get		= getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct sockaddr_in6 sin6;
+	struct nf_conn *ct;
+	__be32 flow_label;
+	int bound_dev_if;
+
+	lock_sock(sk);
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.dst.protonum = sk->sk_protocol;
+	bound_dev_if = sk->sk_bound_dev_if;
+	flow_label = inet6->flow_label;
+	release_sock(sk);
+
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP)
+		return -ENOPROTOOPT;
+
+	if (*len < 0 || (unsigned int)*len < sizeof(sin6))
+		return -EINVAL;
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (!h) {
+		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+	sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
+	memcpy(&sin6.sin6_addr,
+	       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+	       sizeof(sin6.sin6_addr));
+
+	nf_ct_put(ct);
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
+	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+	.pf		= NFPROTO_IPV6,
+	.get_optmin	= IP6T_SO_ORIGINAL_DST,
+	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
+	.get		= ipv6_getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+static unsigned int ipv6_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+	__be16 frag_off;
+	int protoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return nf_conntrack_confirm(skb);
+
+	if (in_vrf_postrouting(state))
+		return NF_ACCEPT;
+
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		return nf_conntrack_confirm(skb);
+	}
+
+	return nf_confirm(skb, protoff, ct, ctinfo);
+}
+
+static unsigned int ipv6_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(skb, state);
+}
+
+static unsigned int ipv6_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(skb, state);
+}
+
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
+	{
+		.hook		= ipv6_conntrack_in,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_conntrack_local,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_LAST,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_LAST - 1,
+	},
+};
+#endif
+
+static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
+{
+	u8 nfproto = (unsigned long)_nfproto;
+
+	if (nf_ct_l3num(ct) != nfproto)
+		return 0;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP &&
+	    ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
+		ct->proto.tcp.seen[0].td_maxwin = 0;
+		ct->proto.tcp.seen[1].td_maxwin = 0;
+	}
+
+	return 0;
+}
+
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	bool fixup_needed = false, retry = true;
+	int err = 0;
+retry:
+	mutex_lock(&nf_ct_proto_mutex);
+
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		cnet->users4++;
+		if (cnet->users4 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv4_enable(net);
+		if (err) {
+			cnet->users4 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+					    ARRAY_SIZE(ipv4_conntrack_ops));
+		if (err)
+			cnet->users4 = 0;
+		else
+			fixup_needed = true;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		cnet->users6++;
+		if (cnet->users6 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv6_enable(net);
+		if (err < 0) {
+			cnet->users6 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+					    ARRAY_SIZE(ipv6_conntrack_ops));
+		if (err)
+			cnet->users6 = 0;
+		else
+			fixup_needed = true;
+		break;
+#endif
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info) {
+			if (!retry) {
+				err = -EPROTO;
+				goto out_unlock;
+			}
+			mutex_unlock(&nf_ct_proto_mutex);
+			request_module("nf_conntrack_bridge");
+			retry = false;
+			goto retry;
+		}
+		if (!try_module_get(nf_ct_bridge_info->me)) {
+			err = -EPROTO;
+			goto out_unlock;
+		}
+		cnet->users_bridge++;
+		if (cnet->users_bridge > 1)
+			goto out_unlock;
+
+		err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+					    nf_ct_bridge_info->ops_size);
+		if (err)
+			cnet->users_bridge = 0;
+		else
+			fixup_needed = true;
+		break;
+	default:
+		err = -EPROTO;
+		break;
+	}
+ out_unlock:
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	if (fixup_needed) {
+		struct nf_ct_iter_data iter_data = {
+			.net	= net,
+			.data	= (void *)(unsigned long)nfproto,
+		};
+		nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup, &iter_data);
+	}
+
+	return err;
+}
+
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	mutex_lock(&nf_ct_proto_mutex);
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		if (cnet->users4 && (--cnet->users4 == 0)) {
+			nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+						ARRAY_SIZE(ipv4_conntrack_ops));
+			nf_defrag_ipv4_disable(net);
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		if (cnet->users6 && (--cnet->users6 == 0)) {
+			nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+						ARRAY_SIZE(ipv6_conntrack_ops));
+			nf_defrag_ipv6_disable(net);
+		}
+		break;
+#endif
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info)
+			break;
+		if (cnet->users_bridge && (--cnet->users_bridge == 0))
+			nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+						nf_ct_bridge_info->ops_size);
+
+		module_put(nf_ct_bridge_info->me);
+		break;
+	}
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+
+static int nf_ct_netns_inet_get(struct net *net)
+{
+	int err;
+
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (err < 0)
+		goto err1;
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+	if (err < 0)
+		goto err2;
+
+	return err;
+err2:
+	nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+#endif
+	return err;
+}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+	int err;
+
+	switch (nfproto) {
+	case NFPROTO_INET:
+		err = nf_ct_netns_inet_get(net);
+		break;
+	case NFPROTO_BRIDGE:
+		err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
+		if (err < 0)
+			return err;
+
+		err = nf_ct_netns_inet_get(net);
+		if (err < 0) {
+			nf_ct_netns_put(net, NFPROTO_BRIDGE);
+			return err;
+		}
+		break;
+	default:
+		err = nf_ct_netns_do_get(net, nfproto);
+		break;
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+	switch (nfproto) {
+	case NFPROTO_BRIDGE:
+		nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+		fallthrough;
+	case NFPROTO_INET:
+		nf_ct_netns_do_put(net, NFPROTO_IPV4);
+		nf_ct_netns_do_put(net, NFPROTO_IPV6);
+		break;
+	default:
+		nf_ct_netns_do_put(net, nfproto);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = info;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(!nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = NULL;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+	int ret;
+
+	ret = nf_register_sockopt(&so_getorigdst);
+	if (ret < 0)
+		return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	ret = nf_register_sockopt(&so_getorigdst6);
+	if (ret < 0)
+		goto cleanup_sockopt;
+#endif
+
+	return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+cleanup_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+#endif
+	return ret;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+	nf_unregister_sockopt(&so_getorigdst6);
+#endif
+}
+
+void nf_conntrack_proto_pernet_init(struct net *net)
+{
+	nf_conntrack_generic_init_net(net);
+	nf_conntrack_udp_init_net(net);
+	nf_conntrack_tcp_init_net(net);
+	nf_conntrack_icmp_init_net(net);
+#if IS_ENABLED(CONFIG_IPV6)
+	nf_conntrack_icmpv6_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	nf_conntrack_dccp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	nf_conntrack_sctp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	nf_conntrack_gre_init_net(net);
+#endif
+}
+
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+		  &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("ip_conntrack");
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
new file mode 100644
index 000000000..d4fd626d2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -0,0 +1,824 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DCCP connection tracking protocol helper
+ *
+ * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/dccp.h>
+#include <linux/slab.h>
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_log.h>
+
+/* Timeouts are based on values from RFC4340:
+ *
+ * - REQUEST:
+ *
+ *   8.1.2. Client Request
+ *
+ *   A client MAY give up on its DCCP-Requests after some time
+ *   (3 minutes, for example).
+ *
+ * - RESPOND:
+ *
+ *   8.1.3. Server Response
+ *
+ *   It MAY also leave the RESPOND state for CLOSED after a timeout of
+ *   not less than 4MSL (8 minutes);
+ *
+ * - PARTOPEN:
+ *
+ *   8.1.5. Handshake Completion
+ *
+ *   If the client remains in PARTOPEN for more than 4MSL (8 minutes),
+ *   it SHOULD reset the connection with Reset Code 2, "Aborted".
+ *
+ * - OPEN:
+ *
+ *   The DCCP timestamp overflows after 11.9 hours. If the connection
+ *   stays idle this long the sequence number won't be recognized
+ *   as valid anymore.
+ *
+ * - CLOSEREQ/CLOSING:
+ *
+ *   8.3. Termination
+ *
+ *   The retransmission timer should initially be set to go off in two
+ *   round-trip times and should back off to not less than once every
+ *   64 seconds ...
+ *
+ * - TIMEWAIT:
+ *
+ *   4.3. States
+ *
+ *   A server or client socket remains in this state for 2MSL (4 minutes)
+ *   after the connection has been town down, ...
+ */
+
+#define DCCP_MSL (2 * 60 * HZ)
+
+static const char * const dccp_state_names[] = {
+	[CT_DCCP_NONE]		= "NONE",
+	[CT_DCCP_REQUEST]	= "REQUEST",
+	[CT_DCCP_RESPOND]	= "RESPOND",
+	[CT_DCCP_PARTOPEN]	= "PARTOPEN",
+	[CT_DCCP_OPEN]		= "OPEN",
+	[CT_DCCP_CLOSEREQ]	= "CLOSEREQ",
+	[CT_DCCP_CLOSING]	= "CLOSING",
+	[CT_DCCP_TIMEWAIT]	= "TIMEWAIT",
+	[CT_DCCP_IGNORE]	= "IGNORE",
+	[CT_DCCP_INVALID]	= "INVALID",
+};
+
+#define sNO	CT_DCCP_NONE
+#define sRQ	CT_DCCP_REQUEST
+#define sRS	CT_DCCP_RESPOND
+#define sPO	CT_DCCP_PARTOPEN
+#define sOP	CT_DCCP_OPEN
+#define sCR	CT_DCCP_CLOSEREQ
+#define sCG	CT_DCCP_CLOSING
+#define sTW	CT_DCCP_TIMEWAIT
+#define sIG	CT_DCCP_IGNORE
+#define sIV	CT_DCCP_INVALID
+
+/*
+ * DCCP state transition table
+ *
+ * The assumption is the same as for TCP tracking:
+ *
+ * We are the man in the middle. All the packets go through us but might
+ * get lost in transit to the destination. It is assumed that the destination
+ * can't receive segments we haven't seen.
+ *
+ * The following states exist:
+ *
+ * NONE:	Initial state, expecting Request
+ * REQUEST:	Request seen, waiting for Response from server
+ * RESPOND:	Response from server seen, waiting for Ack from client
+ * PARTOPEN:	Ack after Response seen, waiting for packet other than Response,
+ * 		Reset or Sync from server
+ * OPEN:	Packet other than Response, Reset or Sync seen
+ * CLOSEREQ:	CloseReq from server seen, expecting Close from client
+ * CLOSING:	Close seen, expecting Reset
+ * TIMEWAIT:	Reset seen
+ * IGNORE:	Not determinable whether packet is valid
+ *
+ * Some states exist only on one side of the connection: REQUEST, RESPOND,
+ * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to
+ * the one it was in before.
+ *
+ * Packets are marked as ignored (sIG) if we don't know if they're valid
+ * (for example a reincarnation of a connection we didn't notice is dead
+ * already) and the server may send back a connection closing Reset or a
+ * Response. They're also used for Sync/SyncAck packets, which we don't
+ * care about.
+ */
+static const u_int8_t
+dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = {
+	[CT_DCCP_ROLE_CLIENT] = {
+		[DCCP_PKT_REQUEST] = {
+		/*
+		 * sNO -> sRQ		Regular Request
+		 * sRQ -> sRQ		Retransmitted Request or reincarnation
+		 * sRS -> sRS		Retransmitted Request (apparently Response
+		 * 			got lost after we saw it) or reincarnation
+		 * sPO -> sIG		Ignore, conntrack might be out of sync
+		 * sOP -> sIG		Ignore, conntrack might be out of sync
+		 * sCR -> sIG		Ignore, conntrack might be out of sync
+		 * sCG -> sIG		Ignore, conntrack might be out of sync
+		 * sTW -> sRQ		Reincarnation
+		 *
+		 *	sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */
+			sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ,
+		},
+		[DCCP_PKT_RESPONSE] = {
+		/*
+		 * sNO -> sIV		Invalid
+		 * sRQ -> sIG		Ignore, might be response to ignored Request
+		 * sRS -> sIG		Ignore, might be response to ignored Request
+		 * sPO -> sIG		Ignore, might be response to ignored Request
+		 * sOP -> sIG		Ignore, might be response to ignored Request
+		 * sCR -> sIG		Ignore, might be response to ignored Request
+		 * sCG -> sIG		Ignore, might be response to ignored Request
+		 * sTW -> sIV		Invalid, reincarnation in reverse direction
+		 *			goes through sRQ
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV,
+		},
+		[DCCP_PKT_ACK] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sPO		Ack for Response, move to PARTOPEN (8.1.5.)
+		 * sPO -> sPO		Retransmitted Ack for Response, remain in PARTOPEN
+		 * sOP -> sOP		Regular ACK, remain in OPEN
+		 * sCR -> sCR		Ack in CLOSEREQ MAY be processed (8.3.)
+		 * sCG -> sCG		Ack in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
+		},
+		[DCCP_PKT_DATA] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sIV		MUST use DataAck in PARTOPEN state (8.1.5.)
+		 * sOP -> sOP		Regular Data packet
+		 * sCR -> sCR		Data in CLOSEREQ MAY be processed (8.3.)
+		 * sCG -> sCG		Data in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV,
+		},
+		[DCCP_PKT_DATAACK] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sPO		Ack for Response, move to PARTOPEN (8.1.5.)
+		 * sPO -> sPO		Remain in PARTOPEN state
+		 * sOP -> sOP		Regular DataAck packet in OPEN state
+		 * sCR -> sCR		DataAck in CLOSEREQ MAY be processed (8.3.)
+		 * sCG -> sCG		DataAck in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
+		},
+		[DCCP_PKT_CLOSEREQ] = {
+		/*
+		 * CLOSEREQ may only be sent by the server.
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV
+		},
+		[DCCP_PKT_CLOSE] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sCG		Client-initiated close
+		 * sOP -> sCG		Client-initiated close
+		 * sCR -> sCG		Close in response to CloseReq (8.3.)
+		 * sCG -> sCG		Retransmit
+		 * sTW -> sIV		Late retransmit, already in TIME_WAIT
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV
+		},
+		[DCCP_PKT_RESET] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sTW		Sync received or timeout, SHOULD send Reset (8.1.1.)
+		 * sRS -> sTW		Response received without Request
+		 * sPO -> sTW		Timeout, SHOULD send Reset (8.1.5.)
+		 * sOP -> sTW		Connection reset
+		 * sCR -> sTW		Connection reset
+		 * sCG -> sTW		Connection reset
+		 * sTW -> sIG		Ignore (don't refresh timer)
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG
+		},
+		[DCCP_PKT_SYNC] = {
+		/*
+		 * We currently ignore Sync packets
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+		},
+		[DCCP_PKT_SYNCACK] = {
+		/*
+		 * We currently ignore SyncAck packets
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+		},
+	},
+	[CT_DCCP_ROLE_SERVER] = {
+		[DCCP_PKT_REQUEST] = {
+		/*
+		 * sNO -> sIV		Invalid
+		 * sRQ -> sIG		Ignore, conntrack might be out of sync
+		 * sRS -> sIG		Ignore, conntrack might be out of sync
+		 * sPO -> sIG		Ignore, conntrack might be out of sync
+		 * sOP -> sIG		Ignore, conntrack might be out of sync
+		 * sCR -> sIG		Ignore, conntrack might be out of sync
+		 * sCG -> sIG		Ignore, conntrack might be out of sync
+		 * sTW -> sRQ		Reincarnation, must reverse roles
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ
+		},
+		[DCCP_PKT_RESPONSE] = {
+		/*
+		 * sNO -> sIV		Response without Request
+		 * sRQ -> sRS		Response to clients Request
+		 * sRS -> sRS		Retransmitted Response (8.1.3. SHOULD NOT)
+		 * sPO -> sIG		Response to an ignored Request or late retransmit
+		 * sOP -> sIG		Ignore, might be response to ignored Request
+		 * sCR -> sIG		Ignore, might be response to ignored Request
+		 * sCG -> sIG		Ignore, might be response to ignored Request
+		 * sTW -> sIV		Invalid, Request from client in sTW moves to sRQ
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV
+		},
+		[DCCP_PKT_ACK] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sOP		Enter OPEN state (8.1.5.)
+		 * sOP -> sOP		Regular Ack in OPEN state
+		 * sCR -> sIV		Waiting for Close from client
+		 * sCG -> sCG		Ack in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+		},
+		[DCCP_PKT_DATA] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sOP		Enter OPEN state (8.1.5.)
+		 * sOP -> sOP		Regular Data packet in OPEN state
+		 * sCR -> sIV		Waiting for Close from client
+		 * sCG -> sCG		Data in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+		},
+		[DCCP_PKT_DATAACK] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sOP		Enter OPEN state (8.1.5.)
+		 * sOP -> sOP		Regular DataAck in OPEN state
+		 * sCR -> sIV		Waiting for Close from client
+		 * sCG -> sCG		Data in CLOSING MAY be processed (8.3.)
+		 * sTW -> sIV
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+		},
+		[DCCP_PKT_CLOSEREQ] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sOP -> sCR	Move directly to CLOSEREQ (8.1.5.)
+		 * sOP -> sCR		CloseReq in OPEN state
+		 * sCR -> sCR		Retransmit
+		 * sCG -> sCR		Simultaneous close, client sends another Close
+		 * sTW -> sIV		Already closed
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV
+		},
+		[DCCP_PKT_CLOSE] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sIV		No connection
+		 * sRS -> sIV		No connection
+		 * sPO -> sOP -> sCG	Move direcly to CLOSING
+		 * sOP -> sCG		Move to CLOSING
+		 * sCR -> sIV		Close after CloseReq is invalid
+		 * sCG -> sCG		Retransmit
+		 * sTW -> sIV		Already closed
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV
+		},
+		[DCCP_PKT_RESET] = {
+		/*
+		 * sNO -> sIV		No connection
+		 * sRQ -> sTW		Reset in response to Request
+		 * sRS -> sTW		Timeout, SHOULD send Reset (8.1.3.)
+		 * sPO -> sTW		Timeout, SHOULD send Reset (8.1.3.)
+		 * sOP -> sTW
+		 * sCR -> sTW
+		 * sCG -> sTW
+		 * sTW -> sIG		Ignore (don't refresh timer)
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */
+			sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG
+		},
+		[DCCP_PKT_SYNC] = {
+		/*
+		 * We currently ignore Sync packets
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+		},
+		[DCCP_PKT_SYNCACK] = {
+		/*
+		 * We currently ignore SyncAck packets
+		 *
+		 *	sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+			sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+		},
+	},
+};
+
+static noinline bool
+dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
+	 const struct dccp_hdr *dh,
+	 const struct nf_hook_state *hook_state)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_dccp_net *dn;
+	const char *msg;
+	u_int8_t state;
+
+	state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
+	switch (state) {
+	default:
+		dn = nf_dccp_pernet(net);
+		if (dn->dccp_loose == 0) {
+			msg = "not picking up existing connection ";
+			goto out_invalid;
+		}
+		break;
+	case CT_DCCP_REQUEST:
+		break;
+	case CT_DCCP_INVALID:
+		msg = "invalid state transition ";
+		goto out_invalid;
+	}
+
+	ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
+	ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
+	ct->proto.dccp.state = CT_DCCP_NONE;
+	ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+	ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+	ct->proto.dccp.handshake_seq = 0;
+	return true;
+
+out_invalid:
+	nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
+	return false;
+}
+
+static u64 dccp_ack_seq(const struct dccp_hdr *dh)
+{
+	const struct dccp_hdr_ack_bits *dhack;
+
+	dhack = (void *)dh + __dccp_basic_hdr_len(dh);
+	return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) +
+		     ntohl(dhack->dccph_ack_nr_low);
+}
+
+static bool dccp_error(const struct dccp_hdr *dh,
+		       struct sk_buff *skb, unsigned int dataoff,
+		       const struct nf_hook_state *state)
+{
+	static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST |
+						   1 << DCCP_PKT_RESPONSE |
+						   1 << DCCP_PKT_CLOSEREQ |
+						   1 << DCCP_PKT_CLOSE |
+						   1 << DCCP_PKT_RESET |
+						   1 << DCCP_PKT_SYNC |
+						   1 << DCCP_PKT_SYNCACK;
+	unsigned int dccp_len = skb->len - dataoff;
+	unsigned int cscov;
+	const char *msg;
+	u8 type;
+
+	BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG);
+
+	if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+	    dh->dccph_doff * 4 > dccp_len) {
+		msg = "nf_ct_dccp: truncated/malformed packet ";
+		goto out_invalid;
+	}
+
+	cscov = dccp_len;
+	if (dh->dccph_cscov) {
+		cscov = (dh->dccph_cscov - 1) * 4;
+		if (cscov > dccp_len) {
+			msg = "nf_ct_dccp: bad checksum coverage ";
+			goto out_invalid;
+		}
+	}
+
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum_partial(skb, state->hook, dataoff, cscov,
+				IPPROTO_DCCP, state->pf)) {
+		msg = "nf_ct_dccp: bad checksum ";
+		goto out_invalid;
+	}
+
+	type = dh->dccph_type;
+	if (type >= DCCP_PKT_INVALID) {
+		msg = "nf_ct_dccp: reserved packet type ";
+		goto out_invalid;
+	}
+
+	if (test_bit(type, &require_seq48) && !dh->dccph_x) {
+		msg = "nf_ct_dccp: type lacks 48bit sequence numbers";
+		goto out_invalid;
+	}
+
+	return false;
+out_invalid:
+	nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
+	return true;
+}
+
+struct nf_conntrack_dccp_buf {
+	struct dccp_hdr dh;	 /* generic header part */
+	struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */
+	union {			 /* depends on header type */
+		struct dccp_hdr_ack_bits ack;
+		struct dccp_hdr_request req;
+		struct dccp_hdr_response response;
+		struct dccp_hdr_reset rst;
+	} u;
+};
+
+static struct dccp_hdr *
+dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh,
+		    struct nf_conntrack_dccp_buf *buf)
+{
+	unsigned int hdrlen = __dccp_hdr_len(dh);
+
+	if (hdrlen > sizeof(*buf))
+		return NULL;
+
+	return skb_header_pointer(skb, offset, hdrlen, buf);
+}
+
+int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_conntrack_dccp_buf _dh;
+	u_int8_t type, old_state, new_state;
+	enum ct_dccp_roles role;
+	unsigned int *timeouts;
+	struct dccp_hdr *dh;
+
+	dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
+	if (!dh)
+		return NF_DROP;
+
+	if (dccp_error(dh, skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	/* pull again, including possible 48 bit sequences and subtype header */
+	dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
+	if (!dh)
+		return NF_DROP;
+
+	type = dh->dccph_type;
+	if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
+		return -NF_ACCEPT;
+
+	if (type == DCCP_PKT_RESET &&
+	    !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		/* Tear down connection immediately if only reply is a RESET */
+		nf_ct_kill_acct(ct, ctinfo, skb);
+		return NF_ACCEPT;
+	}
+
+	spin_lock_bh(&ct->lock);
+
+	role = ct->proto.dccp.role[dir];
+	old_state = ct->proto.dccp.state;
+	new_state = dccp_state_table[role][type][old_state];
+
+	switch (new_state) {
+	case CT_DCCP_REQUEST:
+		if (old_state == CT_DCCP_TIMEWAIT &&
+		    role == CT_DCCP_ROLE_SERVER) {
+			/* Reincarnation in the reverse direction: reopen and
+			 * reverse client/server roles. */
+			ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT;
+			ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER;
+		}
+		break;
+	case CT_DCCP_RESPOND:
+		if (old_state == CT_DCCP_REQUEST)
+			ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
+		break;
+	case CT_DCCP_PARTOPEN:
+		if (old_state == CT_DCCP_RESPOND &&
+		    type == DCCP_PKT_ACK &&
+		    dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq)
+			set_bit(IPS_ASSURED_BIT, &ct->status);
+		break;
+	case CT_DCCP_IGNORE:
+		/*
+		 * Connection tracking might be out of sync, so we ignore
+		 * packets that might establish a new connection and resync
+		 * if the server responds with a valid Response.
+		 */
+		if (ct->proto.dccp.last_dir == !dir &&
+		    ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST &&
+		    type == DCCP_PKT_RESPONSE) {
+			ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT;
+			ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER;
+			ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
+			new_state = CT_DCCP_RESPOND;
+			break;
+		}
+		ct->proto.dccp.last_dir = dir;
+		ct->proto.dccp.last_pkt = type;
+
+		spin_unlock_bh(&ct->lock);
+		nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
+		return NF_ACCEPT;
+	case CT_DCCP_INVALID:
+		spin_unlock_bh(&ct->lock);
+		nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
+		return -NF_ACCEPT;
+	}
+
+	ct->proto.dccp.last_dir = dir;
+	ct->proto.dccp.last_pkt = type;
+	ct->proto.dccp.state = new_state;
+	spin_unlock_bh(&ct->lock);
+
+	if (new_state != old_state)
+		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
+
+	return NF_ACCEPT;
+}
+
+static bool dccp_can_early_drop(const struct nf_conn *ct)
+{
+	switch (ct->proto.dccp.state) {
+	case CT_DCCP_CLOSEREQ:
+	case CT_DCCP_CLOSING:
+	case CT_DCCP_TIMEWAIT:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+{
+	seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+			  struct nf_conn *ct, bool destroy)
+{
+	struct nlattr *nest_parms;
+
+	spin_lock_bh(&ct->lock);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP);
+	if (!nest_parms)
+		goto nla_put_failure;
+	if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state))
+		goto nla_put_failure;
+
+	if (destroy)
+		goto skip_state;
+
+	if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE,
+		       ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) ||
+	    nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
+			 cpu_to_be64(ct->proto.dccp.handshake_seq),
+			 CTA_PROTOINFO_DCCP_PAD))
+		goto nla_put_failure;
+skip_state:
+	nla_nest_end(skb, nest_parms);
+	spin_unlock_bh(&ct->lock);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&ct->lock);
+	return -1;
+}
+
+static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
+	[CTA_PROTOINFO_DCCP_STATE]	= { .type = NLA_U8 },
+	[CTA_PROTOINFO_DCCP_ROLE]	= { .type = NLA_U8 },
+	[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
+	[CTA_PROTOINFO_DCCP_PAD]	= { .type = NLA_UNSPEC },
+};
+
+#define DCCP_NLATTR_SIZE ( \
+	NLA_ALIGN(NLA_HDRLEN + 1) + \
+	NLA_ALIGN(NLA_HDRLEN + 1) + \
+	NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
+	NLA_ALIGN(NLA_HDRLEN + 0))
+
+static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
+{
+	struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
+	struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1];
+	int err;
+
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr,
+					  dccp_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
+	    !tb[CTA_PROTOINFO_DCCP_ROLE] ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&ct->lock);
+	ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
+	if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
+	} else {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
+	}
+	if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
+		ct->proto.dccp.handshake_seq =
+		be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
+	}
+	spin_unlock_bh(&ct->lock);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				      struct net *net, void *data)
+{
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
+	unsigned int *timeouts = data;
+	int i;
+
+	if (!timeouts)
+		 timeouts = dn->dccp_timeout;
+
+	/* set default DCCP timeouts. */
+	for (i=0; i<CT_DCCP_MAX; i++)
+		timeouts[i] = dn->dccp_timeout[i];
+
+	/* there's a 1:1 mapping between attributes and protocol states. */
+	for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
+		if (tb[i]) {
+			timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
+		}
+	}
+
+	timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST];
+	return 0;
+}
+
+static int
+dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+        const unsigned int *timeouts = data;
+	int i;
+
+	for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
+		if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
+			goto nla_put_failure;
+	}
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
+	[CTA_TIMEOUT_DCCP_REQUEST]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_RESPOND]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_PARTOPEN]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_OPEN]		= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_CLOSEREQ]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_CLOSING]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_DCCP_TIMEWAIT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_dccp_init_net(struct net *net)
+{
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
+
+	/* default values */
+	dn->dccp_loose = 1;
+	dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_RESPOND]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_PARTOPEN]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_OPEN]		= 12 * 3600 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
+
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
+	.l4proto		= IPPROTO_DCCP,
+	.can_early_drop		= dccp_can_early_drop,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	.print_conntrack	= dccp_print_conntrack,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.nlattr_size		= DCCP_NLATTR_SIZE,
+	.to_nlattr		= dccp_to_nlattr,
+	.from_nlattr		= nlattr_to_dccp,
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= dccp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= dccp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_DCCP_MAX,
+		.obj_size	= sizeof(unsigned int) * CT_DCCP_MAX,
+		.nla_policy	= dccp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
new file mode 100644
index 000000000..e831637bc
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+
+static const unsigned int nf_ct_generic_timeout = 600*HZ;
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
+					 struct net *net, void *data)
+{
+	struct nf_generic_net *gn = nf_generic_pernet(net);
+	unsigned int *timeout = data;
+
+	if (!timeout)
+		timeout = &gn->timeout;
+
+	if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
+		*timeout =
+		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ;
+	else {
+		/* Set default generic timeout. */
+		*timeout = gn->timeout;
+	}
+
+	return 0;
+}
+
+static int
+generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+        return -ENOSPC;
+}
+
+static const struct nla_policy
+generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = {
+	[CTA_TIMEOUT_GENERIC_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_generic_init_net(struct net *net)
+{
+	struct nf_generic_net *gn = nf_generic_pernet(net);
+
+	gn->timeout = nf_ct_generic_timeout;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
+{
+	.l4proto		= 255,
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= generic_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= generic_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_GENERIC_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= generic_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
new file mode 100644
index 000000000..728eeb0ae
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+static const unsigned int gre_timeouts[GRE_CT_MAX] = {
+	[GRE_CT_UNREPLIED]	= 30*HZ,
+	[GRE_CT_REPLIED]	= 180*HZ,
+};
+
+/* used when expectation is added */
+static DEFINE_SPINLOCK(keymap_lock);
+
+static inline struct nf_gre_net *gre_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.gre;
+}
+
+static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
+				const struct nf_conntrack_tuple *t)
+{
+	return km->tuple.src.l3num == t->src.l3num &&
+	       !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) &&
+	       !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) &&
+	       km->tuple.dst.protonum == t->dst.protonum &&
+	       km->tuple.dst.u.all == t->dst.u.all;
+}
+
+/* look up the source key for a given tuple */
+static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
+{
+	struct nf_gre_net *net_gre = gre_pernet(net);
+	struct nf_ct_gre_keymap *km;
+	__be16 key = 0;
+
+	list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
+		if (gre_key_cmpfn(km, t)) {
+			key = km->tuple.src.u.gre.key;
+			break;
+		}
+	}
+
+	pr_debug("lookup src key 0x%x for ", key);
+	nf_ct_dump_tuple(t);
+
+	return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
+			 struct nf_conntrack_tuple *t)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_gre_net *net_gre = gre_pernet(net);
+	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
+	struct nf_ct_gre_keymap **kmp, *km;
+
+	kmp = &ct_pptp_info->keymap[dir];
+	if (*kmp) {
+		/* check whether it's a retransmission */
+		list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
+			if (gre_key_cmpfn(km, t) && km == *kmp)
+				return 0;
+		}
+		pr_debug("trying to override keymap_%s for ct %p\n",
+			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
+		return -EEXIST;
+	}
+
+	km = kmalloc(sizeof(*km), GFP_ATOMIC);
+	if (!km)
+		return -ENOMEM;
+	memcpy(&km->tuple, t, sizeof(*t));
+	*kmp = km;
+
+	pr_debug("adding new entry %p: ", km);
+	nf_ct_dump_tuple(&km->tuple);
+
+	spin_lock_bh(&keymap_lock);
+	list_add_tail(&km->list, &net_gre->keymap_list);
+	spin_unlock_bh(&keymap_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
+
+/* destroy the keymap entries associated with specified master ct */
+void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
+{
+	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir;
+
+	pr_debug("entering for ct %p\n", ct);
+
+	spin_lock_bh(&keymap_lock);
+	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
+		if (ct_pptp_info->keymap[dir]) {
+			pr_debug("removing %p from list\n",
+				 ct_pptp_info->keymap[dir]);
+			list_del_rcu(&ct_pptp_info->keymap[dir]->list);
+			kfree_rcu(ct_pptp_info->keymap[dir], rcu);
+			ct_pptp_info->keymap[dir] = NULL;
+		}
+	}
+	spin_unlock_bh(&keymap_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* gre hdr info to tuple */
+bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		      struct net *net, struct nf_conntrack_tuple *tuple)
+{
+	const struct pptp_gre_header *pgrehdr;
+	struct pptp_gre_header _pgrehdr;
+	__be16 srckey;
+	const struct gre_base_hdr *grehdr;
+	struct gre_base_hdr _grehdr;
+
+	/* first only delinearize old RFC1701 GRE header */
+	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+	if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
+		/* try to behave like "nf_conntrack_proto_generic" */
+		tuple->src.u.all = 0;
+		tuple->dst.u.all = 0;
+		return true;
+	}
+
+	/* PPTP header is variable length, only need up to the call_id field */
+	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+	if (!pgrehdr)
+		return true;
+
+	if (grehdr->protocol != GRE_PROTO_PPP) {
+		pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol));
+		return false;
+	}
+
+	tuple->dst.u.gre.key = pgrehdr->call_id;
+	srckey = gre_keymap_lookup(net, tuple);
+	tuple->src.u.gre.key = srckey;
+
+	return true;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+/* print private data for conntrack */
+static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+{
+	seq_printf(s, "timeout=%u, stream_timeout=%u ",
+		   (ct->proto.gre.timeout / HZ),
+		   (ct->proto.gre.stream_timeout / HZ));
+}
+#endif
+
+static unsigned int *gre_get_timeouts(struct net *net)
+{
+	return gre_pernet(net)->timeouts;
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+int nf_conntrack_gre_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
+{
+	if (!nf_ct_is_confirmed(ct)) {
+		unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+		if (!timeouts)
+			timeouts = gre_get_timeouts(nf_ct_net(ct));
+
+		/* initialize to sane value.  Ideally a conntrack helper
+		 * (e.g. in case of pptp) is increasing them */
+		ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
+		ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
+	}
+
+	/* If we've seen traffic both ways, this is a GRE connection.
+	 * Extend timeout. */
+	if (ct->status & IPS_SEEN_REPLY) {
+		nf_ct_refresh_acct(ct, ctinfo, skb,
+				   ct->proto.gre.stream_timeout);
+		/* Also, more likely to be important, and not a probe. */
+		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
+	} else
+		nf_ct_refresh_acct(ct, ctinfo, skb,
+				   ct->proto.gre.timeout);
+
+	return NF_ACCEPT;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
+				     struct net *net, void *data)
+{
+	unsigned int *timeouts = data;
+	struct nf_gre_net *net_gre = gre_pernet(net);
+
+	if (!timeouts)
+		timeouts = gre_get_timeouts(net);
+	/* set default timeouts for GRE. */
+	timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED];
+	timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED];
+
+	if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) {
+		timeouts[GRE_CT_UNREPLIED] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ;
+	}
+	if (tb[CTA_TIMEOUT_GRE_REPLIED]) {
+		timeouts[GRE_CT_REPLIED] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ;
+	}
+	return 0;
+}
+
+static int
+gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeouts = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED,
+			 htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED,
+			 htonl(timeouts[GRE_CT_REPLIED] / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
+	[CTA_TIMEOUT_GRE_UNREPLIED]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_GRE_REPLIED]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_gre_init_net(struct net *net)
+{
+	struct nf_gre_net *net_gre = gre_pernet(net);
+	int i;
+
+	INIT_LIST_HEAD(&net_gre->keymap_list);
+	for (i = 0; i < GRE_CT_MAX; i++)
+		net_gre->timeouts[i] = gre_timeouts[i];
+}
+
+/* protocol helper struct */
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
+	.l4proto	 = IPPROTO_GRE,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	.print_conntrack = gre_print_conntrack,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+	.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
+	.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+	.nla_policy	 = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout    = {
+		.nlattr_to_obj	= gre_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= gre_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_GRE_MAX,
+		.obj_size	= sizeof(unsigned int) * GRE_CT_MAX,
+		.nla_policy	= gre_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000..b38b7164a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+#include "nf_internals.h"
+
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
+
+bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		       struct net *net, struct nf_conntrack_tuple *tuple)
+{
+	const struct icmphdr *hp;
+	struct icmphdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+
+	tuple->dst.u.icmp.type = hp->type;
+	tuple->src.u.icmp.id = hp->un.echo.id;
+	tuple->dst.u.icmp.code = hp->code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
+				    const struct nf_conntrack_tuple *orig)
+{
+	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+	    !invmap[orig->dst.u.icmp.type])
+		return false;
+
+	tuple->src.u.icmp.id = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+int nf_conntrack_icmp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
+{
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
+
+	if (state->pf != NFPROTO_IPV4)
+		return -NF_ACCEPT;
+
+	if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+	    !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+		/* Can't create a new ICMP `conn' with this. */
+		pr_debug("icmp: can't create new conn with type %u\n",
+			 ct->tuplehash[0].tuple.dst.u.icmp.type);
+		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+		return -NF_ACCEPT;
+	}
+
+	if (!timeout)
+		timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+	return NF_ACCEPT;
+}
+
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+			    unsigned int dataoff,
+			    const struct nf_hook_state *state,
+			    u8 l4proto, union nf_inet_addr *outer_daddr)
+{
+	struct nf_conntrack_tuple innertuple, origtuple;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_zone *zone;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+	union nf_inet_addr *ct_daddr;
+	enum ip_conntrack_dir dir;
+	struct nf_conn *ct;
+
+	WARN_ON(skb_nfct(skb));
+	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb, dataoff,
+			       state->pf, state->net, &origtuple))
+		return -NF_ACCEPT;
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple))
+		return -NF_ACCEPT;
+
+	h = nf_conntrack_find_get(state->net, zone, &innertuple);
+	if (!h)
+		return -NF_ACCEPT;
+
+	/* Consider: A -> T (=This machine) -> B
+	 *   Conntrack entry will look like this:
+	 *      Original:  A->B
+	 *      Reply:     B->T (SNAT case) OR A
+	 *
+	 * When this function runs, we got packet that looks like this:
+	 * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+	 *
+	 * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+	 * so we should expect that destination of the found connection
+	 * matches outer header destination address.
+	 *
+	 * In above example, we can consider these two cases:
+	 *  1. Error coming in reply direction from B or M (middle box) to
+	 *     T (SNAT case) or A.
+	 *     Inner saddr will be B, dst will be T or A.
+	 *     The found conntrack will be reply tuple (B->T/A).
+	 *  2. Error coming in original direction from A or M to B.
+	 *     Inner saddr will be A, inner daddr will be B.
+	 *     The found conntrack will be original tuple (A->B).
+	 *
+	 * In both cases, conntrack[dir].dst == inner.dst.
+	 *
+	 * A bogus packet could look like this:
+	 *   Inner: B->T
+	 *   Outer: B->X (other machine reachable by T).
+	 *
+	 * In this case, lookup yields connection A->B and will
+	 * set packet from B->X as *RELATED*, even though no connection
+	 * from X was ever seen.
+	 */
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	dir = NF_CT_DIRECTION(h);
+	ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+	if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+		if (state->pf == AF_INET) {
+			nf_l4proto_log_invalid(skb, state,
+					       l4proto,
+					       "outer daddr %pI4 != inner %pI4",
+					       &outer_daddr->ip, &ct_daddr->ip);
+		} else if (state->pf == AF_INET6) {
+			nf_l4proto_log_invalid(skb, state,
+					       l4proto,
+					       "outer daddr %pI6 != inner %pI6",
+					       &outer_daddr->ip6, &ct_daddr->ip6);
+		}
+		nf_ct_put(ct);
+		return -NF_ACCEPT;
+	}
+
+	ctinfo = IP_CT_RELATED;
+	if (dir == IP_CT_DIR_REPLY)
+		ctinfo += IP_CT_IS_REPLY;
+
+	/* Update skb to refer to this connection */
+	nf_ct_set(skb, ct, ctinfo);
+	return NF_ACCEPT;
+}
+
+static void icmp_error_log(const struct sk_buff *skb,
+			   const struct nf_hook_state *state,
+			   const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg);
+}
+
+/* Small and modified version of icmp_rcv */
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb, unsigned int dataoff,
+			      const struct nf_hook_state *state)
+{
+	union nf_inet_addr outer_daddr;
+	const struct icmphdr *icmph;
+	struct icmphdr _ih;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmph == NULL) {
+		icmp_error_log(skb, state, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	/* See nf_conntrack_proto_tcp.c */
+	if (state->net->ct.sysctl_checksum &&
+	    state->hook == NF_INET_PRE_ROUTING &&
+	    nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
+		icmp_error_log(skb, state, "bad hw icmp checksum");
+		return -NF_ACCEPT;
+	}
+
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES) {
+		icmp_error_log(skb, state, "invalid icmp type");
+		return -NF_ACCEPT;
+	}
+
+	/* Need to track icmp error message? */
+	if (!icmp_is_err(icmph->type))
+		return NF_ACCEPT;
+
+	memset(&outer_daddr, 0, sizeof(outer_daddr));
+	outer_daddr.ip = ip_hdr(skb)->daddr;
+
+	dataoff += sizeof(*icmph);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMP, &outer_daddr);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMP_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_ID]	= { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
+{
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) {
+		if (!tb[CTA_PROTO_ICMP_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+		if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) {
+		if (!tb[CTA_PROTO_ICMP_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) {
+		if (!tb[CTA_PROTO_ICMP_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+	}
+
+	return 0;
+}
+
+static unsigned int icmp_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				      struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
+
+	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+		if (!timeout)
+			timeout = &in->timeout;
+		*timeout =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+	} else if (timeout) {
+		/* Set default ICMP timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_icmp_init_net(struct net *net)
+{
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
+
+	in->timeout = nf_ct_icmp_timeout;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
+{
+	.l4proto		= IPPROTO_ICMP,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
+	.nla_policy		= icmp_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000..1020d6760
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+#include "nf_internals.h"
+
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
+
+bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+			 unsigned int dataoff,
+			 struct net *net,
+			 struct nf_conntrack_tuple *tuple)
+{
+	const struct icmp6hdr *hp;
+	struct icmp6hdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+	tuple->dst.u.icmp.type = hp->icmp6_type;
+	tuple->src.u.icmp.id = hp->icmp6_identifier;
+	tuple->dst.u.icmp.code = hp->icmp6_code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY + 1
+};
+
+static const u_int8_t noct_valid_new[] = {
+	[ICMPV6_MGM_QUERY - 130] = 1,
+	[ICMPV6_MGM_REPORT - 130] = 1,
+	[ICMPV6_MGM_REDUCTION - 130] = 1,
+	[NDISC_ROUTER_SOLICITATION - 130] = 1,
+	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+	[ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
+				      const struct nf_conntrack_tuple *orig)
+{
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+		return false;
+
+	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+	return &nf_icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
+			       struct sk_buff *skb,
+			       enum ip_conntrack_info ctinfo,
+			       const struct nf_hook_state *state)
+{
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+	static const u8 valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+
+	if (state->pf != NFPROTO_IPV6)
+		return -NF_ACCEPT;
+
+	if (!nf_ct_is_confirmed(ct)) {
+		int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+		if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+			/* Can't create a new ICMPv6 `conn' with this. */
+			pr_debug("icmpv6: can't create new conn with type %u\n",
+				 type + 128);
+			nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+			return -NF_ACCEPT;
+		}
+	}
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+
+static void icmpv6_error_log(const struct sk_buff *skb,
+			     const struct nf_hook_state *state,
+			     const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
+}
+
+static noinline_for_stack int
+nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb,
+			     unsigned int dataoff,
+			     const struct nf_hook_state *state)
+{
+	u8 hl = ipv6_hdr(skb)->hop_limit;
+	union nf_inet_addr outer_daddr;
+	union {
+		struct nd_opt_hdr nd_opt;
+		struct rd_msg rd_msg;
+	} tmp;
+	const struct nd_opt_hdr *nd_opt;
+	const struct rd_msg *rd_msg;
+
+	rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg);
+	if (!rd_msg) {
+		icmpv6_error_log(skb, state, "short redirect");
+		return -NF_ACCEPT;
+	}
+
+	if (rd_msg->icmph.icmp6_code != 0)
+		return NF_ACCEPT;
+
+	if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect");
+		return -NF_ACCEPT;
+	}
+
+	dataoff += sizeof(*rd_msg);
+
+	/* warning: rd_msg no longer usable after this call */
+	nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt);
+	if (!nd_opt || nd_opt->nd_opt_len == 0) {
+		icmpv6_error_log(skb, state, "redirect without options");
+		return -NF_ACCEPT;
+	}
+
+	/* We could call ndisc_parse_options(), but it would need
+	 * skb_linearize() and a bit more work.
+	 */
+	if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR)
+		return NF_ACCEPT;
+
+	memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+	       sizeof(outer_daddr.ip6));
+	dataoff += 8;
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMPV6, &outer_daddr);
+}
+
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb,
+			      unsigned int dataoff,
+			      const struct nf_hook_state *state)
+{
+	union nf_inet_addr outer_daddr;
+	const struct icmp6hdr *icmp6h;
+	struct icmp6hdr _ih;
+	int type;
+
+	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmp6h == NULL) {
+		icmpv6_error_log(skb, state, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) {
+		icmpv6_error_log(skb, state, "ICMPv6 checksum failed");
+		return -NF_ACCEPT;
+	}
+
+	type = icmp6h->icmp6_type - 130;
+	if (type >= 0 && type < sizeof(noct_valid_new) &&
+	    noct_valid_new[type]) {
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		return NF_ACCEPT;
+	}
+
+	if (icmp6h->icmp6_type == NDISC_REDIRECT)
+		return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state);
+
+	/* is not error message ? */
+	if (icmp6h->icmp6_type >= 128)
+		return NF_ACCEPT;
+
+	memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+	       sizeof(outer_daddr.ip6));
+	dataoff += sizeof(*icmp6h);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMPV6, &outer_daddr);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMPV6_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_ID]	= { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
+{
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) {
+		if (!tb[CTA_PROTO_ICMPV6_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+		if (tuple->dst.u.icmp.type < 128 ||
+		    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type - 128])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) {
+		if (!tb[CTA_PROTO_ICMPV6_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) {
+		if (!tb[CTA_PROTO_ICMPV6_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+	}
+
+	return 0;
+}
+
+static unsigned int icmpv6_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+					struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(net);
+	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+		*timeout =
+		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+	} else {
+		/* Set default ICMPv6 timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_icmpv6_init_net(struct net *net)
+{
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
+
+	in->timeout = nf_ct_icmpv6_timeout;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
+{
+	.l4proto		= IPPROTO_ICMPV6,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
+	.nla_policy		= icmpv6_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmpv6_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
new file mode 100644
index 000000000..c94a9971d
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Connection tracking protocol helper module for SCTP.
+ *
+ * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
+ * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * SCTP is defined in RFC 2960. References to various sections in this code
+ * are to this RFC.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <net/sctp/checksum.h>
+
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+
+static const char *const sctp_conntrack_names[] = {
+	[SCTP_CONNTRACK_NONE]			= "NONE",
+	[SCTP_CONNTRACK_CLOSED]			= "CLOSED",
+	[SCTP_CONNTRACK_COOKIE_WAIT]		= "COOKIE_WAIT",
+	[SCTP_CONNTRACK_COOKIE_ECHOED]		= "COOKIE_ECHOED",
+	[SCTP_CONNTRACK_ESTABLISHED]		= "ESTABLISHED",
+	[SCTP_CONNTRACK_SHUTDOWN_SENT]		= "SHUTDOWN_SENT",
+	[SCTP_CONNTRACK_SHUTDOWN_RECD]		= "SHUTDOWN_RECD",
+	[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]	= "SHUTDOWN_ACK_SENT",
+	[SCTP_CONNTRACK_HEARTBEAT_SENT]		= "HEARTBEAT_SENT",
+};
+
+#define SECS  * HZ
+#define MINS  * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS  * 24 HOURS
+
+static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
+	[SCTP_CONNTRACK_CLOSED]			= 10 SECS,
+	[SCTP_CONNTRACK_COOKIE_WAIT]		= 3 SECS,
+	[SCTP_CONNTRACK_COOKIE_ECHOED]		= 3 SECS,
+	[SCTP_CONNTRACK_ESTABLISHED]		= 210 SECS,
+	[SCTP_CONNTRACK_SHUTDOWN_SENT]		= 3 SECS,
+	[SCTP_CONNTRACK_SHUTDOWN_RECD]		= 3 SECS,
+	[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]	= 3 SECS,
+	[SCTP_CONNTRACK_HEARTBEAT_SENT]		= 30 SECS,
+};
+
+#define	SCTP_FLAG_HEARTBEAT_VTAG_FAILED	1
+
+#define sNO SCTP_CONNTRACK_NONE
+#define	sCL SCTP_CONNTRACK_CLOSED
+#define	sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define	sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define	sES SCTP_CONNTRACK_ESTABLISHED
+#define	sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define	sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define	sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define	sHS SCTP_CONNTRACK_HEARTBEAT_SENT
+#define	sIV SCTP_CONNTRACK_MAX
+
+/*
+	These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end
+point. Please note the subtleties. -Kiran
+
+NONE              - Nothing so far.
+COOKIE WAIT       - We have seen an INIT chunk in the original direction, or also
+		    an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED     - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED       - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT     - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD     - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+		    to that of the SHUTDOWN chunk.
+CLOSED            - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
+		    the SHUTDOWN chunk. Connection is closed.
+HEARTBEAT_SENT    - We have seen a HEARTBEAT in a new flow.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction.
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Full Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
+	{
+/*	ORIGINAL	*/
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init         */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
+/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
+/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
+/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
+/* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
+/* cookie_ack   */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
+/* heartbeat    */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+	},
+	{
+/*	REPLY	*/
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init         */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
+/* init_ack     */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
+/* abort        */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
+/* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
+/* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
+/* cookie_echo  */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
+/* heartbeat    */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
+	}
+};
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+/* Print out the private part of the conntrack. */
+static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+{
+	seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
+}
+#endif
+
+#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
+for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
+	(offset) < (skb)->len &&					\
+	((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch)));	\
+	(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct nf_conn *ct,
+			   const struct sk_buff *skb,
+			   unsigned int dataoff,
+			   unsigned long *map)
+{
+	u_int32_t offset, count;
+	struct sctp_chunkhdr _sch, *sch;
+	int flag;
+
+	flag = 0;
+
+	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+		pr_debug("Chunk Num: %d  Type: %d\n", count, sch->type);
+
+		if (sch->type == SCTP_CID_INIT ||
+		    sch->type == SCTP_CID_INIT_ACK ||
+		    sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
+			flag = 1;
+
+		/*
+		 * Cookie Ack/Echo chunks not the first OR
+		 * Init / Init Ack / Shutdown compl chunks not the only chunks
+		 * OR zero-length.
+		 */
+		if (((sch->type == SCTP_CID_COOKIE_ACK ||
+		      sch->type == SCTP_CID_COOKIE_ECHO ||
+		      flag) &&
+		     count != 0) || !sch->length) {
+			pr_debug("Basic checks failed\n");
+			return 1;
+		}
+
+		if (map)
+			set_bit(sch->type, map);
+	}
+
+	pr_debug("Basic checks passed\n");
+	return count == 0;
+}
+
+static int sctp_new_state(enum ip_conntrack_dir dir,
+			  enum sctp_conntrack cur_state,
+			  int chunk_type)
+{
+	int i;
+
+	pr_debug("Chunk type: %d\n", chunk_type);
+
+	switch (chunk_type) {
+	case SCTP_CID_INIT:
+		pr_debug("SCTP_CID_INIT\n");
+		i = 0;
+		break;
+	case SCTP_CID_INIT_ACK:
+		pr_debug("SCTP_CID_INIT_ACK\n");
+		i = 1;
+		break;
+	case SCTP_CID_ABORT:
+		pr_debug("SCTP_CID_ABORT\n");
+		i = 2;
+		break;
+	case SCTP_CID_SHUTDOWN:
+		pr_debug("SCTP_CID_SHUTDOWN\n");
+		i = 3;
+		break;
+	case SCTP_CID_SHUTDOWN_ACK:
+		pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
+		i = 4;
+		break;
+	case SCTP_CID_ERROR:
+		pr_debug("SCTP_CID_ERROR\n");
+		i = 5;
+		break;
+	case SCTP_CID_COOKIE_ECHO:
+		pr_debug("SCTP_CID_COOKIE_ECHO\n");
+		i = 6;
+		break;
+	case SCTP_CID_COOKIE_ACK:
+		pr_debug("SCTP_CID_COOKIE_ACK\n");
+		i = 7;
+		break;
+	case SCTP_CID_SHUTDOWN_COMPLETE:
+		pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
+		i = 8;
+		break;
+	case SCTP_CID_HEARTBEAT:
+		pr_debug("SCTP_CID_HEARTBEAT");
+		i = 9;
+		break;
+	case SCTP_CID_HEARTBEAT_ACK:
+		pr_debug("SCTP_CID_HEARTBEAT_ACK");
+		i = 10;
+		break;
+	default:
+		/* Other chunks like DATA or SACK do not change the state */
+		pr_debug("Unknown chunk type, Will stay in %s\n",
+			 sctp_conntrack_names[cur_state]);
+		return cur_state;
+	}
+
+	pr_debug("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
+		 dir, sctp_conntrack_names[cur_state], chunk_type,
+		 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+	return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Don't need lock here: this conntrack not in circulation yet */
+static noinline bool
+sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
+	 const struct sctphdr *sh, unsigned int dataoff)
+{
+	enum sctp_conntrack new_state;
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+	u32 offset, count;
+
+	memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
+	new_state = SCTP_CONNTRACK_MAX;
+	for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) {
+		new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+					   SCTP_CONNTRACK_NONE, sch->type);
+
+		/* Invalid: delete conntrack */
+		if (new_state == SCTP_CONNTRACK_NONE ||
+		    new_state == SCTP_CONNTRACK_MAX) {
+			pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
+			return false;
+		}
+
+		/* Copy the vtag into the state info */
+		if (sch->type == SCTP_CID_INIT) {
+			struct sctp_inithdr _inithdr, *ih;
+			/* Sec 8.5.1 (A) */
+			if (sh->vtag)
+				return false;
+
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
+						sizeof(_inithdr), &_inithdr);
+			if (!ih)
+				return false;
+
+			pr_debug("Setting vtag %x for new conn\n",
+				 ih->init_tag);
+
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
+		} else if (sch->type == SCTP_CID_HEARTBEAT) {
+			pr_debug("Setting vtag %x for secondary conntrack\n",
+				 sh->vtag);
+			ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
+		} else {
+		/* If it is a shutdown ack OOTB packet, we expect a return
+		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+			pr_debug("Setting vtag %x for new conn OOTB\n",
+				 sh->vtag);
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+		}
+
+		ct->proto.sctp.state = SCTP_CONNTRACK_NONE;
+	}
+
+	return true;
+}
+
+static bool sctp_error(struct sk_buff *skb,
+		       unsigned int dataoff,
+		       const struct nf_hook_state *state)
+{
+	const struct sctphdr *sh;
+	const char *logmsg;
+
+	if (skb->len < dataoff + sizeof(struct sctphdr)) {
+		logmsg = "nf_ct_sctp: short packet ";
+		goto out_invalid;
+	}
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    skb->ip_summed == CHECKSUM_NONE) {
+		if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
+			logmsg = "nf_ct_sctp: failed to read header ";
+			goto out_invalid;
+		}
+		sh = (const struct sctphdr *)(skb->data + dataoff);
+		if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
+			logmsg = "nf_ct_sctp: bad CRC ";
+			goto out_invalid;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return false;
+out_invalid:
+	nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg);
+	return true;
+}
+
+/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
+int nf_conntrack_sctp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
+{
+	enum sctp_conntrack new_state, old_state;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	const struct sctphdr *sh;
+	struct sctphdr _sctph;
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+	u_int32_t offset, count;
+	unsigned int *timeouts;
+	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
+	bool ignore = false;
+
+	if (sctp_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		goto out;
+
+	if (do_basic_checks(ct, skb, dataoff, map) != 0)
+		goto out;
+
+	if (!nf_ct_is_confirmed(ct)) {
+		/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+		if (test_bit(SCTP_CID_ABORT, map) ||
+		    test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+		    test_bit(SCTP_CID_COOKIE_ACK, map))
+			return -NF_ACCEPT;
+
+		if (!sctp_new(ct, skb, sh, dataoff))
+			return -NF_ACCEPT;
+	}
+
+	/* Check the verification tag (Sec 8.5) */
+	if (!test_bit(SCTP_CID_INIT, map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+	    !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+	    !test_bit(SCTP_CID_ABORT, map) &&
+	    !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+	    !test_bit(SCTP_CID_HEARTBEAT, map) &&
+	    !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
+	    sh->vtag != ct->proto.sctp.vtag[dir]) {
+		pr_debug("Verification tag check failed\n");
+		goto out;
+	}
+
+	old_state = new_state = SCTP_CONNTRACK_NONE;
+	spin_lock_bh(&ct->lock);
+	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+		/* Special cases of Verification tag check (Sec 8.5.1) */
+		if (sch->type == SCTP_CID_INIT) {
+			/* (A) vtag MUST be zero */
+			if (sh->vtag != 0)
+				goto out_unlock;
+		} else if (sch->type == SCTP_CID_ABORT) {
+			/* (B) vtag MUST match own vtag if T flag is unset OR
+			 * MUST match peer's vtag if T flag is set
+			 */
+			if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+			     sh->vtag != ct->proto.sctp.vtag[dir]) ||
+			    ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+			     sh->vtag != ct->proto.sctp.vtag[!dir]))
+				goto out_unlock;
+		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+			/* (C) vtag MUST match own vtag if T flag is unset OR
+			 * MUST match peer's vtag if T flag is set
+			 */
+			if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+			     sh->vtag != ct->proto.sctp.vtag[dir]) ||
+			    ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+			     sh->vtag != ct->proto.sctp.vtag[!dir]))
+				goto out_unlock;
+		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+			/* (D) vtag must be same as init_vtag as found in INIT_ACK */
+			if (sh->vtag != ct->proto.sctp.vtag[dir])
+				goto out_unlock;
+		} else if (sch->type == SCTP_CID_COOKIE_ACK) {
+			ct->proto.sctp.init[dir] = 0;
+			ct->proto.sctp.init[!dir] = 0;
+		} else if (sch->type == SCTP_CID_HEARTBEAT) {
+			if (ct->proto.sctp.vtag[dir] == 0) {
+				pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
+				ct->proto.sctp.vtag[dir] = sh->vtag;
+			} else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
+				if (test_bit(SCTP_CID_DATA, map) || ignore)
+					goto out_unlock;
+
+				ct->proto.sctp.flags |= SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
+				ct->proto.sctp.last_dir = dir;
+				ignore = true;
+				continue;
+			} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
+				ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
+			}
+		} else if (sch->type == SCTP_CID_HEARTBEAT_ACK) {
+			if (ct->proto.sctp.vtag[dir] == 0) {
+				pr_debug("Setting vtag %x for dir %d\n",
+					 sh->vtag, dir);
+				ct->proto.sctp.vtag[dir] = sh->vtag;
+			} else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
+				if (test_bit(SCTP_CID_DATA, map) || ignore)
+					goto out_unlock;
+
+				if ((ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) == 0 ||
+				    ct->proto.sctp.last_dir == dir)
+					goto out_unlock;
+
+				ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
+				ct->proto.sctp.vtag[dir] = sh->vtag;
+				ct->proto.sctp.vtag[!dir] = 0;
+			} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
+				ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
+			}
+		}
+
+		old_state = ct->proto.sctp.state;
+		new_state = sctp_new_state(dir, old_state, sch->type);
+
+		/* Invalid */
+		if (new_state == SCTP_CONNTRACK_MAX) {
+			pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
+				 "conntrack=%u\n",
+				 dir, sch->type, old_state);
+			goto out_unlock;
+		}
+
+		/* If it is an INIT or an INIT ACK note down the vtag */
+		if (sch->type == SCTP_CID_INIT) {
+			struct sctp_inithdr _ih, *ih;
+
+			ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+			if (!ih)
+				goto out_unlock;
+
+			if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+				ct->proto.sctp.init[!dir] = 0;
+			ct->proto.sctp.init[dir] = 1;
+
+			pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
+			ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+			/* don't renew timeout on init retransmit so
+			 * port reuse by client or NAT middlebox cannot
+			 * keep entry alive indefinitely (incl. nat info).
+			 */
+			if (new_state == SCTP_CONNTRACK_CLOSED &&
+			    old_state == SCTP_CONNTRACK_CLOSED &&
+			    nf_ct_is_confirmed(ct))
+				ignore = true;
+		} else if (sch->type == SCTP_CID_INIT_ACK) {
+			struct sctp_inithdr _ih, *ih;
+			__be32 vtag;
+
+			ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+			if (!ih)
+				goto out_unlock;
+
+			vtag = ct->proto.sctp.vtag[!dir];
+			if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
+				goto out_unlock;
+			/* collision */
+			if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
+			    vtag != ih->init_tag)
+				goto out_unlock;
+
+			pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
+			ct->proto.sctp.vtag[!dir] = ih->init_tag;
+		}
+
+		ct->proto.sctp.state = new_state;
+		if (old_state != new_state) {
+			nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+			if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
+			    !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+				nf_conntrack_event_cache(IPCT_ASSURED, ct);
+		}
+	}
+	spin_unlock_bh(&ct->lock);
+
+	/* allow but do not refresh timeout */
+	if (ignore)
+		return NF_ACCEPT;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
+
+	return NF_ACCEPT;
+
+out_unlock:
+	spin_unlock_bh(&ct->lock);
+out:
+	return -NF_ACCEPT;
+}
+
+static bool sctp_can_early_drop(const struct nf_conn *ct)
+{
+	switch (ct->proto.sctp.state) {
+	case SCTP_CONNTRACK_SHUTDOWN_SENT:
+	case SCTP_CONNTRACK_SHUTDOWN_RECD:
+	case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+			  struct nf_conn *ct, bool destroy)
+{
+	struct nlattr *nest_parms;
+
+	spin_lock_bh(&ct->lock);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state))
+		goto nla_put_failure;
+
+	if (destroy)
+		goto skip_state;
+
+	if (nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
+			 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) ||
+	    nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY,
+			 ct->proto.sctp.vtag[IP_CT_DIR_REPLY]))
+		goto nla_put_failure;
+
+skip_state:
+	spin_unlock_bh(&ct->lock);
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&ct->lock);
+	return -1;
+}
+
+static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
+	[CTA_PROTOINFO_SCTP_STATE]	    = { .type = NLA_U8 },
+	[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]  = { .type = NLA_U32 },
+	[CTA_PROTOINFO_SCTP_VTAG_REPLY]     = { .type = NLA_U32 },
+};
+
+#define SCTP_NLATTR_SIZE ( \
+		NLA_ALIGN(NLA_HDRLEN + 1) + \
+		NLA_ALIGN(NLA_HDRLEN + 4) + \
+		NLA_ALIGN(NLA_HDRLEN + 4))
+
+static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
+{
+	struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
+	struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1];
+	int err;
+
+	/* updates may not contain the internal protocol info, skip parsing */
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_SCTP_MAX, attr,
+					  sctp_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_PROTOINFO_SCTP_STATE] ||
+	    !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] ||
+	    !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
+		return -EINVAL;
+
+	spin_lock_bh(&ct->lock);
+	ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
+	ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
+		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
+	ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
+	spin_unlock_bh(&ct->lock);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				      struct net *net, void *data)
+{
+	unsigned int *timeouts = data;
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
+	int i;
+
+	if (!timeouts)
+		timeouts = sn->timeouts;
+
+	/* set default SCTP timeouts. */
+	for (i=0; i<SCTP_CONNTRACK_MAX; i++)
+		timeouts[i] = sn->timeouts[i];
+
+	/* there's a 1:1 mapping between attributes and protocol states. */
+	for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) {
+		if (tb[i]) {
+			timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
+		}
+	}
+
+	timeouts[CTA_TIMEOUT_SCTP_UNSPEC] = timeouts[CTA_TIMEOUT_SCTP_CLOSED];
+	return 0;
+}
+
+static int
+sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+        const unsigned int *timeouts = data;
+	int i;
+
+	for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) {
+	        if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
+			goto nla_put_failure;
+	}
+        return 0;
+
+nla_put_failure:
+        return -ENOSPC;
+}
+
+static const struct nla_policy
+sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
+	[CTA_TIMEOUT_SCTP_CLOSED]		= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_COOKIE_WAIT]		= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_COOKIE_ECHOED]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_ESTABLISHED]		= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_SHUTDOWN_SENT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_SHUTDOWN_RECD]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_sctp_init_net(struct net *net)
+{
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
+	int i;
+
+	for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+		sn->timeouts[i] = sctp_timeouts[i];
+
+	/* timeouts[0] is unused, init it so ->timeouts[0] contains
+	 * 'new' timeout, like udp or icmp.
+	 */
+	sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
+	.l4proto 		= IPPROTO_SCTP,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	.print_conntrack	= sctp_print_conntrack,
+#endif
+	.can_early_drop		= sctp_can_early_drop,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.nlattr_size		= SCTP_NLATTR_SIZE,
+	.to_nlattr		= sctp_to_nlattr,
+	.from_nlattr		= nlattr_to_sctp,
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= sctp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= sctp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_SCTP_MAX,
+		.obj_size	= sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
+		.nla_policy	= sctp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
new file mode 100644
index 000000000..3ac1af6f5
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -0,0 +1,1616 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <asm/unaligned.h>
+
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+
+  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+     closely.  They're more complex. --RR */
+
+static const char *const tcp_conntrack_names[] = {
+	"NONE",
+	"SYN_SENT",
+	"SYN_RECV",
+	"ESTABLISHED",
+	"FIN_WAIT",
+	"CLOSE_WAIT",
+	"LAST_ACK",
+	"TIME_WAIT",
+	"CLOSE",
+	"SYN_SENT2",
+};
+
+enum nf_ct_tcp_action {
+	NFCT_TCP_IGNORE,
+	NFCT_TCP_INVALID,
+	NFCT_TCP_ACCEPT,
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
+	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
+	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
+	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
+	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
+	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
+	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
+	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
+	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
+	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+   Linux uses 15 packets as limit, which corresponds
+   to ~13-30min depending on RTO. */
+	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
+	[TCP_CONNTRACK_UNACK]		= 5 MINS,
+};
+
+#define sNO TCP_CONNTRACK_NONE
+#define sSS TCP_CONNTRACK_SYN_SENT
+#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
+#define sFW TCP_CONNTRACK_FIN_WAIT
+#define sCW TCP_CONNTRACK_CLOSE_WAIT
+#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
+#define sS2 TCP_CONNTRACK_SYN_SENT2
+#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+	TCP_SYN_SET,
+	TCP_SYNACK_SET,
+	TCP_FIN_SET,
+	TCP_ACK_SET,
+	TCP_RST_SET,
+	TCP_NONE_SET,
+};
+
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE:	initial state
+ * SYN_SENT:	SYN-only packet seen
+ * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
+ * SYN_RECV:	SYN-ACK packet seen
+ * ESTABLISHED:	ACK packet seen
+ * FIN_WAIT:	FIN packet seen
+ * CLOSE_WAIT:	ACK seen (after FIN)
+ * LAST_ACK:	FIN seen (after FIN)
+ * TIME_WAIT:	last ACK seen
+ * CLOSE:	closed connection (RST)
+ *
+ * Packets marked as IGNORED (sIG):
+ *	if they may be either invalid or valid
+ *	and the receiver may send back a connection
+ *	closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ *	if we regard them as truly invalid packets
+ */
+static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+	{
+/* ORIGINAL */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
+/*
+ *	sNO -> sSS	Initialize a new connection
+ *	sSS -> sSS	Retransmitted SYN
+ *	sS2 -> sS2	Late retransmitted SYN
+ *	sSR -> sIG
+ *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
+ *			are errors. Receiver will reply with RST
+ *			and close the connection.
+ *			Or we are not in sync and hold a dead connection.
+ *	sFW -> sIG
+ *	sCW -> sIG
+ *	sLA -> sIG
+ *	sTW -> sSS	Reopened connection (RFC 1122).
+ *	sCL -> sSS
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
+/*
+ *	sNO -> sIV	Too late and no reason to do anything
+ *	sSS -> sIV	Client can't send SYN and then SYN/ACK
+ *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
+ *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
+ *	sES -> sIV	Invalid SYN/ACK packets sent by the client
+ *	sFW -> sIV
+ *	sCW -> sIV
+ *	sLA -> sIV
+ *	sTW -> sIV
+ *	sCL -> sIV
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *	sNO -> sIV	Too late and no reason to do anything...
+ *	sSS -> sIV	Client migth not send FIN in this state:
+ *			we enforce waiting for a SYN/ACK reply first.
+ *	sS2 -> sIV
+ *	sSR -> sFW	Close started.
+ *	sES -> sFW
+ *	sFW -> sLA	FIN seen in both directions, waiting for
+ *			the last ACK.
+ *			Migth be a retransmitted FIN as well...
+ *	sCW -> sLA
+ *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
+ *	sTW -> sTW
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *	sNO -> sES	Assumed.
+ *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
+ *	sS2 -> sIV
+ *	sSR -> sES	Established state is reached.
+ *	sES -> sES	:-)
+ *	sFW -> sCW	Normal close request answered by ACK.
+ *	sCW -> sCW
+ *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
+ *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+	},
+	{
+/* REPLY */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
+/*
+ *	sNO -> sIV	Never reached.
+ *	sSS -> sS2	Simultaneous open
+ *	sS2 -> sS2	Retransmitted simultaneous SYN
+ *	sSR -> sIV	Invalid SYN packets sent by the server
+ *	sES -> sIV
+ *	sFW -> sIV
+ *	sCW -> sIV
+ *	sLA -> sIV
+ *	sTW -> sSS	Reopened connection, but server may have switched role
+ *	sCL -> sIV
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
+/*
+ *	sSS -> sSR	Standard open.
+ *	sS2 -> sSR	Simultaneous open
+ *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
+ *	sES -> sIG	Late retransmitted SYN/ACK?
+ *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
+ *	sCW -> sIG
+ *	sLA -> sIG
+ *	sTW -> sIG
+ *	sCL -> sIG
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *	sSS -> sIV	Server might not send FIN in this state.
+ *	sS2 -> sIV
+ *	sSR -> sFW	Close started.
+ *	sES -> sFW
+ *	sFW -> sLA	FIN seen in both directions.
+ *	sCW -> sLA
+ *	sLA -> sLA	Retransmitted FIN.
+ *	sTW -> sTW
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
+/*
+ *	sSS -> sIG	Might be a half-open connection.
+ *	sS2 -> sIG
+ *	sSR -> sSR	Might answer late resent SYN.
+ *	sES -> sES	:-)
+ *	sFW -> sCW	Normal close request answered by ACK.
+ *	sCW -> sCW
+ *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
+ *	sTW -> sTW	Retransmitted last ACK.
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+	}
+};
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+/* Print out the private part of the conntrack. */
+static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+{
+	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+		return;
+
+	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
+}
+#endif
+
+static unsigned int get_conntrack_index(const struct tcphdr *tcph)
+{
+	if (tcph->rst) return TCP_RST_SET;
+	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+	else if (tcph->fin) return TCP_FIN_SET;
+	else if (tcph->ack) return TCP_ACK_SET;
+	else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+   in IP Filter' by Guido van Rooij.
+
+   http://www.sane.nl/events/sane2000/papers.html
+   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
+
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
+	td_maxend = max(sack + max(win,1)) seen in reply packets
+	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+	td_maxwin += seq + len - sender.td_maxend
+			if seq + len > sender.td_maxend
+	td_end    = max(seq + len) seen in sent packets
+
+   I.   Upper bound for valid data:	seq <= sender.td_maxend
+   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
+   III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
+   IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
+
+   where sack is the highest right edge of sack block found in the packet
+   or ack in the case of packet without SACK option.
+
+   The upper bound limit for a valid (s)ack is not ignored -
+   we doesn't have to deal with fragments.
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+					 size_t len,
+					 unsigned int dataoff,
+					 const struct tcphdr *tcph)
+{
+	/* XXX Should I use payload length field in IP/IPv6 header ?
+	 * - YK */
+	return (seq + len - dataoff - tcph->doff*4
+		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST			66000
+#define MAXACKWINDOW(sender)						\
+	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
+					      : MAXACKWINCONST)
+
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+			unsigned int dataoff,
+			const struct tcphdr *tcph,
+			struct ip_ct_tcp_state *state)
+{
+	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+	const unsigned char *ptr;
+	int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+	if (!length)
+		return;
+
+	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+				 length, buff);
+	if (!ptr)
+		return;
+
+	state->td_scale = 0;
+	state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+
+	while (length > 0) {
+		int opcode=*ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return;
+			opsize=*ptr++;
+			if (opsize < 2) /* "silly options" */
+				return;
+			if (opsize > length)
+				return;	/* don't parse partial options */
+
+			if (opcode == TCPOPT_SACK_PERM
+			    && opsize == TCPOLEN_SACK_PERM)
+				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+			else if (opcode == TCPOPT_WINDOW
+				 && opsize == TCPOLEN_WINDOW) {
+				state->td_scale = *(u_int8_t *)ptr;
+
+				if (state->td_scale > TCP_MAX_WSCALE)
+					state->td_scale = TCP_MAX_WSCALE;
+
+				state->flags |=
+					IP_CT_TCP_FLAG_WINDOW_SCALE;
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+}
+
+static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
+                     const struct tcphdr *tcph, __u32 *sack)
+{
+	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+	const unsigned char *ptr;
+	int length = (tcph->doff*4) - sizeof(struct tcphdr);
+	__u32 tmp;
+
+	if (!length)
+		return;
+
+	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+				 length, buff);
+	if (!ptr)
+		return;
+
+	/* Fast path for timestamp-only option */
+	if (length == TCPOLEN_TSTAMP_ALIGNED
+	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+				       | (TCPOPT_NOP << 16)
+				       | (TCPOPT_TIMESTAMP << 8)
+				       | TCPOLEN_TIMESTAMP))
+		return;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize, i;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return;
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
+				return;
+			if (opsize > length)
+				return;	/* don't parse partial options */
+
+			if (opcode == TCPOPT_SACK
+			    && opsize >= (TCPOLEN_SACK_BASE
+					  + TCPOLEN_SACK_PERBLOCK)
+			    && !((opsize - TCPOLEN_SACK_BASE)
+				 % TCPOLEN_SACK_PERBLOCK)) {
+				for (i = 0;
+				     i < (opsize - TCPOLEN_SACK_BASE);
+				     i += TCPOLEN_SACK_PERBLOCK) {
+					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
+
+					if (after(tmp, *sack))
+						*sack = tmp;
+				}
+				return;
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+}
+
+static void tcp_init_sender(struct ip_ct_tcp_state *sender,
+			    struct ip_ct_tcp_state *receiver,
+			    const struct sk_buff *skb,
+			    unsigned int dataoff,
+			    const struct tcphdr *tcph,
+			    u32 end, u32 win)
+{
+	/* SYN-ACK in reply to a SYN
+	 * or SYN from reply direction in simultaneous open.
+	 */
+	sender->td_end =
+	sender->td_maxend = end;
+	sender->td_maxwin = (win == 0 ? 1 : win);
+
+	tcp_options(skb, dataoff, tcph, sender);
+	/* RFC 1323:
+	 * Both sides must send the Window Scale option
+	 * to enable window scaling in either direction.
+	 */
+	if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+	      receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
+		sender->td_scale = 0;
+		receiver->td_scale = 0;
+	}
+}
+
+__printf(6, 7)
+static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb,
+						const struct nf_conn *ct,
+						const struct nf_hook_state *state,
+						const struct ip_ct_tcp_state *sender,
+						enum nf_ct_tcp_action ret,
+						const char *fmt, ...)
+{
+	const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct));
+	struct va_format vaf;
+	va_list args;
+	bool be_liberal;
+
+	be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal;
+	if (be_liberal)
+		return NFCT_TCP_ACCEPT;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf);
+	va_end(args);
+
+	return ret;
+}
+
+static enum nf_ct_tcp_action
+tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
+	      unsigned int index, const struct sk_buff *skb,
+	      unsigned int dataoff, const struct tcphdr *tcph,
+	      const struct nf_hook_state *hook_state)
+{
+	struct ip_ct_tcp *state = &ct->proto.tcp;
+	struct ip_ct_tcp_state *sender = &state->seen[dir];
+	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+	__u32 seq, ack, sack, end, win, swin;
+	bool in_recv_win, seq_ok;
+	s32 receiver_offset;
+	u16 win_raw;
+
+	/*
+	 * Get the required data from the packet.
+	 */
+	seq = ntohl(tcph->seq);
+	ack = sack = ntohl(tcph->ack_seq);
+	win_raw = ntohs(tcph->window);
+	win = win_raw;
+	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
+
+	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+		tcp_sack(skb, dataoff, tcph, &sack);
+
+	/* Take into account NAT sequence number mangling */
+	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
+	ack -= receiver_offset;
+	sack -= receiver_offset;
+
+	if (sender->td_maxwin == 0) {
+		/*
+		 * Initialize sender data.
+		 */
+		if (tcph->syn) {
+			tcp_init_sender(sender, receiver,
+					skb, dataoff, tcph,
+					end, win);
+			if (!tcph->ack)
+				/* Simultaneous open */
+				return NFCT_TCP_ACCEPT;
+		} else {
+			/*
+			 * We are in the middle of a connection,
+			 * its history is lost for us.
+			 * Let's try to use the data from the packet.
+			 */
+			sender->td_end = end;
+			swin = win << sender->td_scale;
+			sender->td_maxwin = (swin == 0 ? 1 : swin);
+			sender->td_maxend = end + sender->td_maxwin;
+			if (receiver->td_maxwin == 0) {
+				/* We haven't seen traffic in the other
+				 * direction yet but we have to tweak window
+				 * tracking to pass III and IV until that
+				 * happens.
+				 */
+				receiver->td_end = receiver->td_maxend = sack;
+			} else if (sack == receiver->td_end + 1) {
+				/* Likely a reply to a keepalive.
+				 * Needed for III.
+				 */
+				receiver->td_end++;
+			}
+
+		}
+	} else if (tcph->syn &&
+		   after(end, sender->td_end) &&
+		   (state->state == TCP_CONNTRACK_SYN_SENT ||
+		    state->state == TCP_CONNTRACK_SYN_RECV)) {
+		/*
+		 * RFC 793: "if a TCP is reinitialized ... then it need
+		 * not wait at all; it must only be sure to use sequence
+		 * numbers larger than those recently used."
+		 *
+		 * Re-init state for this direction, just like for the first
+		 * syn(-ack) reply, it might differ in seq, ack or tcp options.
+		 */
+		tcp_init_sender(sender, receiver,
+				skb, dataoff, tcph,
+				end, win);
+
+		if (dir == IP_CT_DIR_REPLY && !tcph->ack)
+			return NFCT_TCP_ACCEPT;
+	}
+
+	if (!(tcph->ack)) {
+		/*
+		 * If there is no ACK, just pretend it was set and OK.
+		 */
+		ack = sack = receiver->td_end;
+	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
+		    (TCP_FLAG_ACK|TCP_FLAG_RST))
+		   && (ack == 0)) {
+		/*
+		 * Broken TCP stacks, that set ACK in RST packets as well
+		 * with zero ack value.
+		 */
+		ack = sack = receiver->td_end;
+	}
+
+	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
+		/*
+		 * RST sent answering SYN.
+		 */
+		seq = end = sender->td_end;
+
+	seq_ok = before(seq, sender->td_maxend + 1);
+	if (!seq_ok) {
+		u32 overshot = end - sender->td_maxend + 1;
+		bool ack_ok;
+
+		ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
+		in_recv_win = receiver->td_maxwin &&
+			      after(end, sender->td_end - receiver->td_maxwin - 1);
+
+		if (in_recv_win &&
+		    ack_ok &&
+		    overshot <= receiver->td_maxwin &&
+		    before(sack, receiver->td_end + 1)) {
+			/* Work around TCPs that send more bytes than allowed by
+			 * the receive window.
+			 *
+			 * If the (marked as invalid) packet is allowed to pass by
+			 * the ruleset and the peer acks this data, then its possible
+			 * all future packets will trigger 'ACK is over upper bound' check.
+			 *
+			 * Thus if only the sequence check fails then do update td_end so
+			 * possible ACK for this data can update internal state.
+			 */
+			sender->td_end = end;
+			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+			return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+						  "%u bytes more than expected", overshot);
+		}
+
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+					  "SEQ is over upper bound %u (over the window of the receiver)",
+					  sender->td_maxend + 1);
+	}
+
+	if (!before(sack, receiver->td_end + 1))
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+					  "ACK is over upper bound %u (ACKed data not seen yet)",
+					  receiver->td_end + 1);
+
+	/* Is the ending sequence in the receive window (if available)? */
+	in_recv_win = !receiver->td_maxwin ||
+		      after(end, sender->td_end - receiver->td_maxwin - 1);
+	if (!in_recv_win)
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+					  "SEQ is under lower bound %u (already ACKed data retransmitted)",
+					  sender->td_end - receiver->td_maxwin - 1);
+	if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1))
+		return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+					  "ignored ACK under lower bound %u (possible overly delayed)",
+					  receiver->td_end - MAXACKWINDOW(sender) - 1);
+
+	/* Take into account window scaling (RFC 1323). */
+	if (!tcph->syn)
+		win <<= sender->td_scale;
+
+	/* Update sender data. */
+	swin = win + (sack - ack);
+	if (sender->td_maxwin < swin)
+		sender->td_maxwin = swin;
+	if (after(end, sender->td_end)) {
+		sender->td_end = end;
+		sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+	}
+	if (tcph->ack) {
+		if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
+			sender->td_maxack = ack;
+			sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
+		} else if (after(ack, sender->td_maxack)) {
+			sender->td_maxack = ack;
+		}
+	}
+
+	/* Update receiver data. */
+	if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
+		receiver->td_maxwin += end - sender->td_maxend;
+	if (after(sack + win, receiver->td_maxend - 1)) {
+		receiver->td_maxend = sack + win;
+		if (win == 0)
+			receiver->td_maxend++;
+	}
+	if (ack == receiver->td_end)
+		receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+	/* Check retransmissions. */
+	if (index == TCP_ACK_SET) {
+		if (state->last_dir == dir &&
+		    state->last_seq == seq &&
+		    state->last_ack == ack &&
+		    state->last_end == end &&
+		    state->last_win == win_raw) {
+			state->retrans++;
+		} else {
+			state->last_dir = dir;
+			state->last_seq = seq;
+			state->last_ack = ack;
+			state->last_end = end;
+			state->last_win = win_raw;
+			state->retrans = 0;
+		}
+	}
+
+	return NFCT_TCP_ACCEPT;
+}
+
+static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
+					 enum ip_conntrack_dir dir,
+					 int index,
+					 const struct sk_buff *skb,
+					 const struct nf_hook_state *hook_state)
+{
+	const unsigned int *timeouts;
+	const struct nf_tcp_net *tn;
+	unsigned int timeout;
+	u32 expires;
+
+	if (!test_bit(IPS_ASSURED_BIT, &ct->status) ||
+	    test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+		return;
+
+	/* We don't want to have connections hanging around in ESTABLISHED
+	 * state for long time 'just because' conntrack deemed a FIN/RST
+	 * out-of-window.
+	 *
+	 * Shrink the timeout just like when there is unacked data.
+	 * This speeds up eviction of 'dead' connections where the
+	 * connection and conntracks internal state are out of sync.
+	 */
+	switch (index) {
+	case TCP_RST_SET:
+	case TCP_FIN_SET:
+		break;
+	default:
+		return;
+	}
+
+	if (ct->proto.tcp.last_dir != dir &&
+	    (ct->proto.tcp.last_index == TCP_FIN_SET ||
+	     ct->proto.tcp.last_index == TCP_RST_SET)) {
+		expires = nf_ct_expires(ct);
+		if (expires < 120 * HZ)
+			return;
+
+		tn = nf_tcp_pernet(nf_ct_net(ct));
+		timeouts = nf_ct_timeout_lookup(ct);
+		if (!timeouts)
+			timeouts = tn->timeouts;
+
+		timeout = READ_ONCE(timeouts[TCP_CONNTRACK_UNACK]);
+		if (expires > timeout) {
+			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
+					  "packet (index %d, dir %d) response for index %d lower timeout to %u",
+					  index, dir, ct->proto.tcp.last_index, timeout);
+
+			WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+		}
+	} else {
+		ct->proto.tcp.last_index = index;
+		ct->proto.tcp.last_dir = dir;
+	}
+}
+
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
+				 TCPHDR_URG) + 1] =
+{
+	[TCPHDR_SYN]				= 1,
+	[TCPHDR_SYN|TCPHDR_URG]			= 1,
+	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
+	[TCPHDR_RST]				= 1,
+	[TCPHDR_RST|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
+	[TCPHDR_ACK]				= 1,
+	[TCPHDR_ACK|TCPHDR_URG]			= 1,
+};
+
+static void tcp_error_log(const struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
+}
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
+static bool tcp_error(const struct tcphdr *th,
+		      struct sk_buff *skb,
+		      unsigned int dataoff,
+		      const struct nf_hook_state *state)
+{
+	unsigned int tcplen = skb->len - dataoff;
+	u8 tcpflags;
+
+	/* Not whole TCP header or malformed packet */
+	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+		tcp_error_log(skb, state, "truncated packet");
+		return true;
+	}
+
+	/* Checksum invalid? Ignore.
+	 * We skip checking packets on the outgoing path
+	 * because the checksum is assumed to be correct.
+	 */
+	/* FIXME: Source route IP option packets --RR */
+	if (state->net->ct.sysctl_checksum &&
+	    state->hook == NF_INET_PRE_ROUTING &&
+	    nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
+		tcp_error_log(skb, state, "bad checksum");
+		return true;
+	}
+
+	/* Check TCP flags. */
+	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
+	if (!tcp_valid_flags[tcpflags]) {
+		tcp_error_log(skb, state, "invalid tcp flag combination");
+		return true;
+	}
+
+	return false;
+}
+
+static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     const struct tcphdr *th)
+{
+	enum tcp_conntrack new_state;
+	struct net *net = nf_ct_net(ct);
+	const struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+	/* Don't need lock here: this conntrack not in circulation yet */
+	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
+
+	/* Invalid: delete conntrack */
+	if (new_state >= TCP_CONNTRACK_MAX) {
+		pr_debug("nf_ct_tcp: invalid new deleting.\n");
+		return false;
+	}
+
+	if (new_state == TCP_CONNTRACK_SYN_SENT) {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+		/* SYN packet */
+		ct->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     dataoff, th);
+		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (ct->proto.tcp.seen[0].td_maxwin == 0)
+			ct->proto.tcp.seen[0].td_maxwin = 1;
+		ct->proto.tcp.seen[0].td_maxend =
+			ct->proto.tcp.seen[0].td_end;
+
+		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+	} else if (tn->tcp_loose == 0) {
+		/* Don't try to pick up connections. */
+		return false;
+	} else {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+		/*
+		 * We are in the middle of a connection,
+		 * its history is lost for us.
+		 * Let's try to use the data from the packet.
+		 */
+		ct->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     dataoff, th);
+		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (ct->proto.tcp.seen[0].td_maxwin == 0)
+			ct->proto.tcp.seen[0].td_maxwin = 1;
+		ct->proto.tcp.seen[0].td_maxend =
+			ct->proto.tcp.seen[0].td_end +
+			ct->proto.tcp.seen[0].td_maxwin;
+
+		/* We assume SACK and liberal window checking to handle
+		 * window scaling */
+		ct->proto.tcp.seen[0].flags =
+		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+					      IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
+	/* tcp_packet will set them */
+	ct->proto.tcp.last_index = TCP_NONE_SET;
+	return true;
+}
+
+static bool tcp_can_early_drop(const struct nf_conn *ct)
+{
+	switch (ct->proto.tcp.state) {
+	case TCP_CONNTRACK_FIN_WAIT:
+	case TCP_CONNTRACK_LAST_ACK:
+	case TCP_CONNTRACK_TIME_WAIT:
+	case TCP_CONNTRACK_CLOSE:
+	case TCP_CONNTRACK_CLOSE_WAIT:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+{
+	state->td_end		= 0;
+	state->td_maxend	= 0;
+	state->td_maxwin	= 0;
+	state->td_maxack	= 0;
+	state->td_scale		= 0;
+	state->flags		&= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+int nf_conntrack_tcp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+	struct nf_conntrack_tuple *tuple;
+	enum tcp_conntrack new_state, old_state;
+	unsigned int index, *timeouts;
+	enum nf_ct_tcp_action res;
+	enum ip_conntrack_dir dir;
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	unsigned long timeout;
+
+	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return -NF_ACCEPT;
+
+	if (tcp_error(th, skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+		return -NF_ACCEPT;
+
+	spin_lock_bh(&ct->lock);
+	old_state = ct->proto.tcp.state;
+	dir = CTINFO2DIR(ctinfo);
+	index = get_conntrack_index(th);
+	new_state = tcp_conntracks[dir][index][old_state];
+	tuple = &ct->tuplehash[dir].tuple;
+
+	switch (new_state) {
+	case TCP_CONNTRACK_SYN_SENT:
+		if (old_state < TCP_CONNTRACK_TIME_WAIT)
+			break;
+		/* RFC 1122: "When a connection is closed actively,
+		 * it MUST linger in TIME-WAIT state for a time 2xMSL
+		 * (Maximum Segment Lifetime). However, it MAY accept
+		 * a new SYN from the remote TCP to reopen the connection
+		 * directly from TIME-WAIT state, if..."
+		 * We ignore the conditions because we are in the
+		 * TIME-WAIT state anyway.
+		 *
+		 * Handle aborted connections: we and the server
+		 * think there is an existing connection but the client
+		 * aborts it and starts a new one.
+		 */
+		if (((ct->proto.tcp.seen[dir].flags
+		      | ct->proto.tcp.seen[!dir].flags)
+		     & IP_CT_TCP_FLAG_CLOSE_INIT)
+		    || (ct->proto.tcp.last_dir == dir
+		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
+			/* Attempt to reopen a closed/aborted connection.
+			 * Delete this connection and look up again. */
+			spin_unlock_bh(&ct->lock);
+
+			/* Only repeat if we can actually remove the timer.
+			 * Destruction may already be in progress in process
+			 * context and we must give it a chance to terminate.
+			 */
+			if (nf_ct_kill(ct))
+				return -NF_REPEAT;
+			return NF_DROP;
+		}
+		fallthrough;
+	case TCP_CONNTRACK_IGNORE:
+		/* Ignored packets:
+		 *
+		 * Our connection entry may be out of sync, so ignore
+		 * packets which may signal the real connection between
+		 * the client and the server.
+		 *
+		 * a) SYN in ORIGINAL
+		 * b) SYN/ACK in REPLY
+		 * c) ACK in reply direction after initial SYN in original.
+		 *
+		 * If the ignored packet is invalid, the receiver will send
+		 * a RST we'll catch below.
+		 */
+		if (index == TCP_SYNACK_SET
+		    && ct->proto.tcp.last_index == TCP_SYN_SET
+		    && ct->proto.tcp.last_dir != dir
+		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
+			/* b) This SYN/ACK acknowledges a SYN that we earlier
+			 * ignored as invalid. This means that the client and
+			 * the server are both in sync, while the firewall is
+			 * not. We get in sync from the previously annotated
+			 * values.
+			 */
+			old_state = TCP_CONNTRACK_SYN_SENT;
+			new_state = TCP_CONNTRACK_SYN_RECV;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
+				ct->proto.tcp.last_end;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
+				ct->proto.tcp.last_end;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
+				ct->proto.tcp.last_win == 0 ?
+					1 : ct->proto.tcp.last_win;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
+				ct->proto.tcp.last_wscale;
+			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
+				ct->proto.tcp.last_flags;
+			nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
+			break;
+		}
+		ct->proto.tcp.last_index = index;
+		ct->proto.tcp.last_dir = dir;
+		ct->proto.tcp.last_seq = ntohl(th->seq);
+		ct->proto.tcp.last_end =
+		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
+		ct->proto.tcp.last_win = ntohs(th->window);
+
+		/* a) This is a SYN in ORIGINAL. The client and the server
+		 * may be in sync but we are not. In that case, we annotate
+		 * the TCP options and let the packet go through. If it is a
+		 * valid SYN packet, the server will reply with a SYN/ACK, and
+		 * then we'll get in sync. Otherwise, the server potentially
+		 * responds with a challenge ACK if implementing RFC5961.
+		 */
+		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
+			struct ip_ct_tcp_state seen = {};
+
+			ct->proto.tcp.last_flags =
+			ct->proto.tcp.last_wscale = 0;
+			tcp_options(skb, dataoff, th, &seen);
+			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+				ct->proto.tcp.last_flags |=
+					IP_CT_TCP_FLAG_WINDOW_SCALE;
+				ct->proto.tcp.last_wscale = seen.td_scale;
+			}
+			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
+				ct->proto.tcp.last_flags |=
+					IP_CT_TCP_FLAG_SACK_PERM;
+			}
+			/* Mark the potential for RFC5961 challenge ACK,
+			 * this pose a special problem for LAST_ACK state
+			 * as ACK is intrepretated as ACKing last FIN.
+			 */
+			if (old_state == TCP_CONNTRACK_LAST_ACK)
+				ct->proto.tcp.last_flags |=
+					IP_CT_EXP_CHALLENGE_ACK;
+		}
+
+		/* possible challenge ack reply to syn */
+		if (old_state == TCP_CONNTRACK_SYN_SENT &&
+		    index == TCP_ACK_SET &&
+		    dir == IP_CT_DIR_REPLY)
+			ct->proto.tcp.last_ack = ntohl(th->ack_seq);
+
+		spin_unlock_bh(&ct->lock);
+		nf_ct_l4proto_log_invalid(skb, ct, state,
+					  "packet (index %d) in dir %d ignored, state %s",
+					  index, dir,
+					  tcp_conntrack_names[old_state]);
+		return NF_ACCEPT;
+	case TCP_CONNTRACK_MAX:
+		/* Special case for SYN proxy: when the SYN to the server or
+		 * the SYN/ACK from the server is lost, the client may transmit
+		 * a keep-alive packet while in SYN_SENT state. This needs to
+		 * be associated with the original conntrack entry in order to
+		 * generate a new SYN with the correct sequence number.
+		 */
+		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
+		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
+		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
+		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
+			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
+			spin_unlock_bh(&ct->lock);
+			return NF_ACCEPT;
+		}
+
+		/* Invalid packet */
+		spin_unlock_bh(&ct->lock);
+		nf_ct_l4proto_log_invalid(skb, ct, state,
+					  "packet (index %d) in dir %d invalid, state %s",
+					  index, dir,
+					  tcp_conntrack_names[old_state]);
+		return -NF_ACCEPT;
+	case TCP_CONNTRACK_TIME_WAIT:
+		/* RFC5961 compliance cause stack to send "challenge-ACK"
+		 * e.g. in response to spurious SYNs.  Conntrack MUST
+		 * not believe this ACK is acking last FIN.
+		 */
+		if (old_state == TCP_CONNTRACK_LAST_ACK &&
+		    index == TCP_ACK_SET &&
+		    ct->proto.tcp.last_dir != dir &&
+		    ct->proto.tcp.last_index == TCP_SYN_SET &&
+		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
+			/* Detected RFC5961 challenge ACK */
+			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+			spin_unlock_bh(&ct->lock);
+			nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
+			return NF_ACCEPT; /* Don't change state */
+		}
+		break;
+	case TCP_CONNTRACK_SYN_SENT2:
+		/* tcp_conntracks table is not smart enough to handle
+		 * simultaneous open.
+		 */
+		ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
+		    ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
+			new_state = TCP_CONNTRACK_ESTABLISHED;
+		break;
+	case TCP_CONNTRACK_CLOSE:
+		if (index != TCP_RST_SET)
+			break;
+
+		/* If we are closing, tuple might have been re-used already.
+		 * last_index, last_ack, and all other ct fields used for
+		 * sequence/window validation are outdated in that case.
+		 *
+		 * As the conntrack can already be expired by GC under pressure,
+		 * just skip validation checks.
+		 */
+		if (tcp_can_early_drop(ct))
+			goto in_window;
+
+		/* td_maxack might be outdated if we let a SYN through earlier */
+		if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
+		    ct->proto.tcp.last_index != TCP_SYN_SET) {
+			u32 seq = ntohl(th->seq);
+
+			/* If we are not in established state and SEQ=0 this is most
+			 * likely an answer to a SYN we let go through above (last_index
+			 * can be updated due to out-of-order ACKs).
+			 */
+			if (seq == 0 && !nf_conntrack_tcp_established(ct))
+				break;
+
+			if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) &&
+			    !tn->tcp_ignore_invalid_rst) {
+				/* Invalid RST  */
+				spin_unlock_bh(&ct->lock);
+				nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
+				return -NF_ACCEPT;
+			}
+
+			if (!nf_conntrack_tcp_established(ct) ||
+			    seq == ct->proto.tcp.seen[!dir].td_maxack)
+				break;
+
+			/* Check if rst is part of train, such as
+			 *   foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
+			 *   foo:80 > bar:4379: R, 235946602:235946602(0)  ack 42
+			 */
+			if (ct->proto.tcp.last_index == TCP_ACK_SET &&
+			    ct->proto.tcp.last_dir == dir &&
+			    seq == ct->proto.tcp.last_end)
+				break;
+
+			/* ... RST sequence number doesn't match exactly, keep
+			 * established state to allow a possible challenge ACK.
+			 */
+			new_state = old_state;
+		}
+		if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+			 && ct->proto.tcp.last_index == TCP_SYN_SET)
+			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
+			    && ct->proto.tcp.last_index == TCP_ACK_SET))
+		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
+			/* RST sent to invalid SYN or ACK we had let through
+			 * at a) and c) above:
+			 *
+			 * a) SYN was in window then
+			 * c) we hold a half-open connection.
+			 *
+			 * Delete our connection entry.
+			 * We skip window checking, because packet might ACK
+			 * segments we ignored. */
+			goto in_window;
+		}
+
+		/* Reset in response to a challenge-ack we let through earlier */
+		if (old_state == TCP_CONNTRACK_SYN_SENT &&
+		    ct->proto.tcp.last_index == TCP_ACK_SET &&
+		    ct->proto.tcp.last_dir == IP_CT_DIR_REPLY &&
+		    ntohl(th->seq) == ct->proto.tcp.last_ack)
+			goto in_window;
+
+		break;
+	default:
+		/* Keep compilers happy. */
+		break;
+	}
+
+	res = tcp_in_window(ct, dir, index,
+			    skb, dataoff, th, state);
+	switch (res) {
+	case NFCT_TCP_IGNORE:
+		spin_unlock_bh(&ct->lock);
+		return NF_ACCEPT;
+	case NFCT_TCP_INVALID:
+		nf_tcp_handle_invalid(ct, dir, index, skb, state);
+		spin_unlock_bh(&ct->lock);
+		return -NF_ACCEPT;
+	case NFCT_TCP_ACCEPT:
+		break;
+	}
+     in_window:
+	/* From now on we have got in-window packets */
+	ct->proto.tcp.last_index = index;
+	ct->proto.tcp.last_dir = dir;
+
+	pr_debug("tcp_conntracks: ");
+	nf_ct_dump_tuple(tuple);
+	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+		 old_state, new_state);
+
+	ct->proto.tcp.state = new_state;
+	if (old_state != new_state
+	    && new_state == TCP_CONNTRACK_FIN_WAIT)
+		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = tn->timeouts;
+
+	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
+	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+		timeout = timeouts[TCP_CONNTRACK_RETRANS];
+	else if (unlikely(index == TCP_RST_SET))
+		timeout = timeouts[TCP_CONNTRACK_CLOSE];
+	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
+		timeout = timeouts[TCP_CONNTRACK_UNACK];
+	else if (ct->proto.tcp.last_win == 0 &&
+		 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+		timeout = timeouts[TCP_CONNTRACK_RETRANS];
+	else
+		timeout = timeouts[new_state];
+	spin_unlock_bh(&ct->lock);
+
+	if (new_state != old_state)
+		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+
+	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		/* If only reply is a RST, we can consider ourselves not to
+		   have an established connection: this is a fairly common
+		   problem case, so we can delete the conntrack
+		   immediately.  --RR */
+		if (th->rst) {
+			nf_ct_kill_acct(ct, ctinfo, skb);
+			return NF_ACCEPT;
+		}
+
+		if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
+			/* do not renew timeout on SYN retransmit.
+			 *
+			 * Else port reuse by client or NAT middlebox can keep
+			 * entry alive indefinitely (including nat info).
+			 */
+			return NF_ACCEPT;
+		}
+
+		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
+		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
+		 */
+		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
+		    timeout > timeouts[TCP_CONNTRACK_UNACK])
+			timeout = timeouts[TCP_CONNTRACK_UNACK];
+	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
+		   && (old_state == TCP_CONNTRACK_SYN_RECV
+		       || old_state == TCP_CONNTRACK_ESTABLISHED)
+		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
+		/* Set ASSURED if we see valid ack in ESTABLISHED
+		   after SYN_RECV or a valid answer for a picked up
+		   connection. */
+		set_bit(IPS_ASSURED_BIT, &ct->status);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
+	}
+	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
+
+	return NF_ACCEPT;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+			 struct nf_conn *ct, bool destroy)
+{
+	struct nlattr *nest_parms;
+	struct nf_ct_tcp_flags tmp = {};
+
+	spin_lock_bh(&ct->lock);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state))
+		goto nla_put_failure;
+
+	if (destroy)
+		goto skip_state;
+
+	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+		       ct->proto.tcp.seen[0].td_scale) ||
+	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
+		       ct->proto.tcp.seen[1].td_scale))
+		goto nla_put_failure;
+
+	tmp.flags = ct->proto.tcp.seen[0].flags;
+	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+		    sizeof(struct nf_ct_tcp_flags), &tmp))
+		goto nla_put_failure;
+
+	tmp.flags = ct->proto.tcp.seen[1].flags;
+	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+		    sizeof(struct nf_ct_tcp_flags), &tmp))
+		goto nla_put_failure;
+skip_state:
+	spin_unlock_bh(&ct->lock);
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&ct->lock);
+	return -1;
+}
+
+static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
+	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
+	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
+	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
+	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
+	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len = sizeof(struct nf_ct_tcp_flags) },
+};
+
+#define TCP_NLATTR_SIZE	( \
+	NLA_ALIGN(NLA_HDRLEN + 1) + \
+	NLA_ALIGN(NLA_HDRLEN + 1) + \
+	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
+	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
+
+static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
+{
+	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
+	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
+	int err;
+
+	/* updates could not contain anything about the private
+	 * protocol info, in that case skip the parsing */
+	if (!pattr)
+		return 0;
+
+	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr,
+					  tcp_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (tb[CTA_PROTOINFO_TCP_STATE] &&
+	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
+		return -EINVAL;
+
+	spin_lock_bh(&ct->lock);
+	if (tb[CTA_PROTOINFO_TCP_STATE])
+		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
+		struct nf_ct_tcp_flags *attr =
+			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
+		ct->proto.tcp.seen[0].flags &= ~attr->mask;
+		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
+		struct nf_ct_tcp_flags *attr =
+			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
+		ct->proto.tcp.seen[1].flags &= ~attr->mask;
+		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
+	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
+	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+		ct->proto.tcp.seen[0].td_scale =
+			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
+		ct->proto.tcp.seen[1].td_scale =
+			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
+	}
+	spin_unlock_bh(&ct->lock);
+
+	return 0;
+}
+
+static unsigned int tcp_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				     struct net *net, void *data)
+{
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+	unsigned int *timeouts = data;
+	int i;
+
+	if (!timeouts)
+		timeouts = tn->timeouts;
+	/* set default TCP timeouts. */
+	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
+		timeouts[i] = tn->timeouts[i];
+
+	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
+		timeouts[TCP_CONNTRACK_SYN_SENT] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
+	}
+
+	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
+		timeouts[TCP_CONNTRACK_SYN_RECV] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
+		timeouts[TCP_CONNTRACK_ESTABLISHED] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
+		timeouts[TCP_CONNTRACK_FIN_WAIT] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
+		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
+		timeouts[TCP_CONNTRACK_LAST_ACK] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
+		timeouts[TCP_CONNTRACK_TIME_WAIT] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
+		timeouts[TCP_CONNTRACK_CLOSE] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
+		timeouts[TCP_CONNTRACK_SYN_SENT2] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
+		timeouts[TCP_CONNTRACK_RETRANS] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
+	}
+	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
+		timeouts[TCP_CONNTRACK_UNACK] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
+	}
+
+	timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
+	return 0;
+}
+
+static int
+tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeouts = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
+			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
+			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
+			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
+			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
+			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
+			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
+			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
+			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
+			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
+			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
+			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
+	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
+	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_tcp_init_net(struct net *net)
+{
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+	int i;
+
+	for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
+		tn->timeouts[i] = tcp_timeouts[i];
+
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
+
+	/* If it is set to zero, we disable picking up already established
+	 * connections.
+	 */
+	tn->tcp_loose = 1;
+
+	/* "Be conservative in what you do,
+	 *  be liberal in what you accept from others."
+	 * If it's non-zero, we mark only out of window RST segments as INVALID.
+	 */
+	tn->tcp_be_liberal = 0;
+
+	/* If it's non-zero, we turn off RST sequence number check */
+	tn->tcp_ignore_invalid_rst = 0;
+
+	/* Max number of the retransmitted packets without receiving an (acceptable)
+	 * ACK from the destination. If this number is reached, a shorter timer
+	 * will be started.
+	 */
+	tn->tcp_max_retrans = 3;
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	tn->offload_timeout = 30 * HZ;
+#endif
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
+{
+	.l4proto 		= IPPROTO_TCP,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	.print_conntrack 	= tcp_print_conntrack,
+#endif
+	.can_early_drop		= tcp_can_early_drop,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.to_nlattr		= tcp_to_nlattr,
+	.from_nlattr		= nlattr_to_tcp,
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
+	.nlattr_size		= TCP_NLATTR_SIZE,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
+		.obj_size	= sizeof(unsigned int) *
+					TCP_CONNTRACK_TIMEOUT_MAX,
+		.nla_policy	= tcp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
new file mode 100644
index 000000000..3b516cffc
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+
+static const unsigned int udp_timeouts[UDP_CT_MAX] = {
+	[UDP_CT_UNREPLIED]	= 30*HZ,
+	[UDP_CT_REPLIED]	= 120*HZ,
+};
+
+static unsigned int *udp_get_timeouts(struct net *net)
+{
+	return nf_udp_pernet(net)->timeouts;
+}
+
+static void udp_error_log(const struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg);
+}
+
+static bool udp_error(struct sk_buff *skb,
+		      unsigned int dataoff,
+		      const struct nf_hook_state *state)
+{
+	unsigned int udplen = skb->len - dataoff;
+	const struct udphdr *hdr;
+	struct udphdr _hdr;
+
+	/* Header is too small? */
+	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (!hdr) {
+		udp_error_log(skb, state, "short packet");
+		return true;
+	}
+
+	/* Truncated/malformed packets */
+	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+		udp_error_log(skb, state, "truncated/malformed packet");
+		return true;
+	}
+
+	/* Packet with no checksum */
+	if (!hdr->check)
+		return false;
+
+	/* Checksum invalid? Ignore.
+	 * We skip checking packets on the outgoing path
+	 * because the checksum is assumed to be correct.
+	 * FIXME: Source route IP option packets --RR */
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) {
+		udp_error_log(skb, state, "bad checksum");
+		return true;
+	}
+
+	return false;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+int nf_conntrack_udp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
+{
+	unsigned int *timeouts;
+
+	if (udp_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = udp_get_timeouts(nf_ct_net(ct));
+
+	if (!nf_ct_is_confirmed(ct))
+		ct->proto.udp.stream_ts = 2 * HZ + jiffies;
+
+	/* If we've seen traffic both ways, this is some kind of UDP
+	 * stream. Set Assured.
+	 */
+	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		unsigned long extra = timeouts[UDP_CT_UNREPLIED];
+		bool stream = false;
+
+		/* Still active after two seconds? Extend timeout. */
+		if (time_after(jiffies, ct->proto.udp.stream_ts)) {
+			extra = timeouts[UDP_CT_REPLIED];
+			stream = true;
+		}
+
+		nf_ct_refresh_acct(ct, ctinfo, skb, extra);
+
+		/* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+		if (unlikely((ct->status & IPS_NAT_CLASH)))
+			return NF_ACCEPT;
+
+		/* Also, more likely to be important, and not a probe */
+		if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
+	} else {
+		nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
+	}
+	return NF_ACCEPT;
+}
+
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb,
+			      const struct nf_hook_state *state,
+			      const char *msg)
+{
+	nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
+}
+
+static bool udplite_error(struct sk_buff *skb,
+			  unsigned int dataoff,
+			  const struct nf_hook_state *state)
+{
+	unsigned int udplen = skb->len - dataoff;
+	const struct udphdr *hdr;
+	struct udphdr _hdr;
+	unsigned int cscov;
+
+	/* Header is too small? */
+	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (!hdr) {
+		udplite_error_log(skb, state, "short packet");
+		return true;
+	}
+
+	cscov = ntohs(hdr->len);
+	if (cscov == 0) {
+		cscov = udplen;
+	} else if (cscov < sizeof(*hdr) || cscov > udplen) {
+		udplite_error_log(skb, state, "invalid checksum coverage");
+		return true;
+	}
+
+	/* UDPLITE mandates checksums */
+	if (!hdr->check) {
+		udplite_error_log(skb, state, "checksum missing");
+		return true;
+	}
+
+	/* Checksum invalid? Ignore. */
+	if (state->hook == NF_INET_PRE_ROUTING &&
+	    state->net->ct.sysctl_checksum &&
+	    nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
+				state->pf)) {
+		udplite_error_log(skb, state, "bad checksum");
+		return true;
+	}
+
+	return false;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+int nf_conntrack_udplite_packet(struct nf_conn *ct,
+				struct sk_buff *skb,
+				unsigned int dataoff,
+				enum ip_conntrack_info ctinfo,
+				const struct nf_hook_state *state)
+{
+	unsigned int *timeouts;
+
+	if (udplite_error(skb, dataoff, state))
+		return -NF_ACCEPT;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = udp_get_timeouts(nf_ct_net(ct));
+
+	/* If we've seen traffic both ways, this is some kind of UDP
+	   stream.  Extend timeout. */
+	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		nf_ct_refresh_acct(ct, ctinfo, skb,
+				   timeouts[UDP_CT_REPLIED]);
+
+		if (unlikely((ct->status & IPS_NAT_CLASH)))
+			return NF_ACCEPT;
+
+		/* Also, more likely to be important, and not a probe */
+		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
+	} else {
+		nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
+	}
+	return NF_ACCEPT;
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				     struct net *net, void *data)
+{
+	unsigned int *timeouts = data;
+	struct nf_udp_net *un = nf_udp_pernet(net);
+
+	if (!timeouts)
+		timeouts = un->timeouts;
+
+	/* set default timeouts for UDP. */
+	timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
+	timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
+
+	if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) {
+		timeouts[UDP_CT_UNREPLIED] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ;
+	}
+	if (tb[CTA_TIMEOUT_UDP_REPLIED]) {
+		timeouts[UDP_CT_REPLIED] =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ;
+	}
+	return 0;
+}
+
+static int
+udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeouts = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED,
+			 htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED,
+			 htonl(timeouts[UDP_CT_REPLIED] / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = {
+       [CTA_TIMEOUT_UDP_UNREPLIED]	= { .type = NLA_U32 },
+       [CTA_TIMEOUT_UDP_REPLIED]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+void nf_conntrack_udp_init_net(struct net *net)
+{
+	struct nf_udp_net *un = nf_udp_pernet(net);
+	int i;
+
+	for (i = 0; i < UDP_CT_MAX; i++)
+		un->timeouts[i] = udp_timeouts[i];
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	un->offload_timeout = 30 * HZ;
+#endif
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
+{
+	.l4proto		= IPPROTO_UDP,
+	.allow_clash		= true,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_UDP_MAX,
+		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
+		.nla_policy	= udp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
+
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
+{
+	.l4proto		= IPPROTO_UDPLITE,
+	.allow_clash		= true,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
+	.nla_policy		= nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_UDP_MAX,
+		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
+		.nla_policy	= udp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+};
+#endif
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
new file mode 100644
index 000000000..13dc421fc
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* SANE connection tracking helper
+ * (SANE = Scanner Access Now Easy)
+ * For documentation about the SANE network protocol see
+ * http://www.sane-project.org/html/doc015.html
+ */
+
+/* Copyright (C) 2007 Red Hat, Inc.
+ * Author: Michal Schmidt <mschmidt@redhat.com>
+ * Based on the FTP conntrack helper (net/netfilter/nf_conntrack_ftp.c):
+ *  (C) 1999-2001 Paul `Rusty' Russell
+ *  (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *  (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *  (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sane.h>
+
+#define HELPER_NAME "sane"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
+MODULE_DESCRIPTION("SANE connection tracking helper");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+struct sane_request {
+	__be32 RPC_code;
+#define SANE_NET_START      7   /* RPC code */
+
+	__be32 handle;
+};
+
+struct sane_reply_net_start {
+	__be32 status;
+#define SANE_STATUS_SUCCESS 0
+
+	__be16 zero;
+	__be16 port;
+	/* other fields aren't interesting for conntrack */
+};
+
+static int help(struct sk_buff *skb,
+		unsigned int protoff,
+		struct nf_conn *ct,
+		enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	int ret = NF_ACCEPT;
+	int dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct);
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple *tuple;
+	struct sane_reply_net_start *reply;
+	union {
+		struct sane_request req;
+		struct sane_reply_net_start repl;
+	} buf;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	/* Not a full tcp header? */
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	/* No data? */
+	dataoff = protoff + th->doff * 4;
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	datalen = skb->len - dataoff;
+	if (dir == IP_CT_DIR_ORIGINAL) {
+		const struct sane_request *req;
+
+		if (datalen != sizeof(struct sane_request))
+			return NF_ACCEPT;
+
+		req = skb_header_pointer(skb, dataoff, datalen, &buf.req);
+		if (!req)
+			return NF_ACCEPT;
+
+		if (req->RPC_code != htonl(SANE_NET_START)) {
+			/* Not an interesting command */
+			WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
+			return NF_ACCEPT;
+		}
+
+		/* We're interested in the next reply */
+		WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED);
+		return NF_ACCEPT;
+	}
+
+	/* IP_CT_DIR_REPLY */
+
+	/* Is it a reply to an uninteresting command? */
+	if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED)
+		return NF_ACCEPT;
+
+	/* It's a reply to SANE_NET_START. */
+	WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
+
+	if (datalen < sizeof(struct sane_reply_net_start)) {
+		pr_debug("NET_START reply too short\n");
+		return NF_ACCEPT;
+	}
+
+	datalen = sizeof(struct sane_reply_net_start);
+
+	reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl);
+	if (!reply)
+		return NF_ACCEPT;
+
+	if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
+		/* saned refused the command */
+		pr_debug("unsuccessful SANE_STATUS = %u\n",
+			 ntohl(reply->status));
+		return NF_ACCEPT;
+	}
+
+	/* Invalid saned reply? Ignore it. */
+	if (reply->zero != 0)
+		return NF_ACCEPT;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
+		return NF_DROP;
+	}
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			  &tuple->src.u3, &tuple->dst.u3,
+			  IPPROTO_TCP, NULL, &reply->port);
+
+	pr_debug("expect: ");
+	nf_ct_dump_tuple(&exp->tuple);
+
+	/* Can't expect this?  Best to drop packet now. */
+	if (nf_ct_expect_related(exp, 0) != 0) {
+		nf_ct_helper_log(skb, ct, "cannot add expectation");
+		ret = NF_DROP;
+	}
+
+	nf_ct_expect_put(exp);
+	return ret;
+}
+
+static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly;
+
+static const struct nf_conntrack_expect_policy sane_exp_policy = {
+	.max_expected	= 1,
+	.timeout	= 5 * 60,
+};
+
+static void __exit nf_conntrack_sane_fini(void)
+{
+	nf_conntrack_helpers_unregister(sane, ports_c * 2);
+}
+
+static int __init nf_conntrack_sane_init(void)
+{
+	int i, ret = 0;
+
+	NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
+
+	if (ports_c == 0)
+		ports[ports_c++] = SANE_PORT;
+
+	/* FIXME should be configurable whether IPv4 and IPv6 connections
+		 are tracked or not - YK */
+	for (i = 0; i < ports_c; i++) {
+		nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, SANE_PORT, ports[i], ports[i],
+				  &sane_exp_policy, 0, help, NULL,
+				  THIS_MODULE);
+		nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, SANE_PORT, ports[i], ports[i],
+				  &sane_exp_policy, 0, help, NULL,
+				  THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(sane, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+module_init(nf_conntrack_sane_init);
+module_exit(nf_conntrack_sane_fini);
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 000000000..7ab2b25b5
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+
+int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		      s32 off)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_conn_seqadj *seqadj;
+	struct nf_ct_seqadj *this_way;
+
+	if (off == 0)
+		return 0;
+
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+	seqadj = nfct_seqadj(ct);
+	this_way = &seqadj->seq[dir];
+	this_way->offset_before	 = off;
+	this_way->offset_after	 = off;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
+
+int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		     __be32 seq, s32 off)
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_seqadj *this_way;
+
+	if (off == 0)
+		return 0;
+
+	if (unlikely(!seqadj)) {
+		WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
+		return 0;
+	}
+
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+	spin_lock_bh(&ct->lock);
+	this_way = &seqadj->seq[dir];
+	if (this_way->offset_before == this_way->offset_after ||
+	    before(this_way->correction_pos, ntohl(seq))) {
+		this_way->correction_pos = ntohl(seq);
+		this_way->offset_before	 = this_way->offset_after;
+		this_way->offset_after	+= off;
+	}
+	spin_unlock_bh(&ct->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
+
+void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
+			  struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			  s32 off)
+{
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP)
+		return;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
+	nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
+
+/* Adjust one found SACK option including checksum correction */
+static void nf_ct_sack_block_adjust(struct sk_buff *skb,
+				    struct tcphdr *tcph,
+				    unsigned int sackoff,
+				    unsigned int sackend,
+				    struct nf_ct_seqadj *seq)
+{
+	while (sackoff < sackend) {
+		struct tcp_sack_block_wire *sack;
+		__be32 new_start_seq, new_end_seq;
+
+		sack = (void *)skb->data + sackoff;
+		if (after(ntohl(sack->start_seq) - seq->offset_before,
+			  seq->correction_pos))
+			new_start_seq = htonl(ntohl(sack->start_seq) -
+					seq->offset_after);
+		else
+			new_start_seq = htonl(ntohl(sack->start_seq) -
+					seq->offset_before);
+
+		if (after(ntohl(sack->end_seq) - seq->offset_before,
+			  seq->correction_pos))
+			new_end_seq = htonl(ntohl(sack->end_seq) -
+				      seq->offset_after);
+		else
+			new_end_seq = htonl(ntohl(sack->end_seq) -
+				      seq->offset_before);
+
+		pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n",
+			 ntohl(sack->start_seq), ntohl(new_start_seq),
+			 ntohl(sack->end_seq), ntohl(new_end_seq));
+
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->start_seq, new_start_seq, false);
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->end_seq, new_end_seq, false);
+		sack->start_seq = new_start_seq;
+		sack->end_seq = new_end_seq;
+		sackoff += sizeof(*sack);
+	}
+}
+
+/* TCP SACK sequence number adjustment */
+static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
+				      unsigned int protoff,
+				      struct nf_conn *ct,
+				      enum ip_conntrack_info ctinfo)
+{
+	struct tcphdr *tcph = (void *)skb->data + protoff;
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	unsigned int dir, optoff, optend;
+
+	optoff = protoff + sizeof(struct tcphdr);
+	optend = protoff + tcph->doff * 4;
+
+	if (skb_ensure_writable(skb, optend))
+		return 0;
+
+	tcph = (void *)skb->data + protoff;
+	dir = CTINFO2DIR(ctinfo);
+
+	while (optoff < optend) {
+		/* Usually: option, length. */
+		unsigned char *op = skb->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			/* no partial options */
+			if (optoff + 1 == optend ||
+			    optoff + op[1] > optend ||
+			    op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_SACK &&
+			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+				nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
+							optoff+op[1],
+							&seqadj->seq[!dir]);
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+
+/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
+int nf_ct_seq_adjust(struct sk_buff *skb,
+		     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		     unsigned int protoff)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct tcphdr *tcph;
+	__be32 newseq, newack;
+	s32 seqoff, ackoff;
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *this_way, *other_way;
+	int res = 1;
+
+	this_way  = &seqadj->seq[dir];
+	other_way = &seqadj->seq[!dir];
+
+	if (skb_ensure_writable(skb, protoff + sizeof(*tcph)))
+		return 0;
+
+	tcph = (void *)skb->data + protoff;
+	spin_lock_bh(&ct->lock);
+	if (after(ntohl(tcph->seq), this_way->correction_pos))
+		seqoff = this_way->offset_after;
+	else
+		seqoff = this_way->offset_before;
+
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+	pr_debug("Adjusting sequence number from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq));
+	tcph->seq = newseq;
+
+	if (!tcph->ack)
+		goto out;
+
+	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+		  other_way->correction_pos))
+		ackoff = other_way->offset_after;
+	else
+		ackoff = other_way->offset_before;
+
+	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
+				 false);
+	pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+		 ntohl(newack));
+	tcph->ack_seq = newack;
+
+	res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo);
+out:
+	spin_unlock_bh(&ct->lock);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
+
+s32 nf_ct_seq_offset(const struct nf_conn *ct,
+		     enum ip_conntrack_dir dir,
+		     u32 seq)
+{
+	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	struct nf_ct_seqadj *this_way;
+
+	if (!seqadj)
+		return 0;
+
+	this_way = &seqadj->seq[dir];
+	return after(seq, this_way->correction_pos) ?
+		 this_way->offset_after : this_way->offset_before;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
new file mode 100644
index 000000000..d0eac27f6
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -0,0 +1,1707 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* SIP extension for IP connection tracking.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_conntrack_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+#define HELPER_NAME "sip"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP connection tracking helper");
+MODULE_ALIAS("ip_conntrack_sip");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+#define MAX_PORTS	8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of SIP servers");
+
+static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT;
+module_param(sip_timeout, uint, 0600);
+MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
+
+static int sip_direct_signalling __read_mostly = 1;
+module_param(sip_direct_signalling, int, 0600);
+MODULE_PARM_DESC(sip_direct_signalling, "expect incoming calls from registrar "
+					"only (default 1)");
+
+static int sip_direct_media __read_mostly = 1;
+module_param(sip_direct_media, int, 0600);
+MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
+				   "endpoints only (default 1)");
+
+static int sip_external_media __read_mostly = 0;
+module_param(sip_external_media, int, 0600);
+MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external "
+				     "endpoints (default 0)");
+
+const struct nf_nat_sip_hooks __rcu *nf_nat_sip_hooks;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
+
+static int string_len(const struct nf_conn *ct, const char *dptr,
+		      const char *limit, int *shift)
+{
+	int len = 0;
+
+	while (dptr < limit && isalpha(*dptr)) {
+		dptr++;
+		len++;
+	}
+	return len;
+}
+
+static int digits_len(const struct nf_conn *ct, const char *dptr,
+		      const char *limit, int *shift)
+{
+	int len = 0;
+	while (dptr < limit && isdigit(*dptr)) {
+		dptr++;
+		len++;
+	}
+	return len;
+}
+
+static int iswordc(const char c)
+{
+	if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
+	    (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' ||
+	    c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
+	    c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') ||
+	    c == '\'')
+		return 1;
+	return 0;
+}
+
+static int word_len(const char *dptr, const char *limit)
+{
+	int len = 0;
+	while (dptr < limit && iswordc(*dptr)) {
+		dptr++;
+		len++;
+	}
+	return len;
+}
+
+static int callid_len(const struct nf_conn *ct, const char *dptr,
+		      const char *limit, int *shift)
+{
+	int len, domain_len;
+
+	len = word_len(dptr, limit);
+	dptr += len;
+	if (!len || dptr == limit || *dptr != '@')
+		return len;
+	dptr++;
+	len++;
+
+	domain_len = word_len(dptr, limit);
+	if (!domain_len)
+		return 0;
+	return len + domain_len;
+}
+
+/* get media type + port length */
+static int media_len(const struct nf_conn *ct, const char *dptr,
+		     const char *limit, int *shift)
+{
+	int len = string_len(ct, dptr, limit, shift);
+
+	dptr += len;
+	if (dptr >= limit || *dptr != ' ')
+		return 0;
+	len++;
+	dptr++;
+
+	return len + digits_len(ct, dptr, limit, shift);
+}
+
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit, bool delim)
+{
+	const char *end;
+	int ret;
+
+	if (!ct)
+		return 0;
+
+	memset(addr, 0, sizeof(*addr));
+	switch (nf_ct_l3num(ct)) {
+	case AF_INET:
+		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		if (ret == 0)
+			return 0;
+		break;
+	case AF_INET6:
+		if (cp < limit && *cp == '[')
+			cp++;
+		else if (delim)
+			return 0;
+
+		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		if (ret == 0)
+			return 0;
+
+		if (end < limit && *end == ']')
+			end++;
+		else if (delim)
+			return 0;
+		break;
+	default:
+		BUG();
+	}
+
+	if (endp)
+		*endp = end;
+	return 1;
+}
+
+/* skip ip address. returns its length. */
+static int epaddr_len(const struct nf_conn *ct, const char *dptr,
+		      const char *limit, int *shift)
+{
+	union nf_inet_addr addr;
+	const char *aux = dptr;
+
+	if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
+		pr_debug("ip: %s parse failed.!\n", dptr);
+		return 0;
+	}
+
+	/* Port number */
+	if (*dptr == ':') {
+		dptr++;
+		dptr += digits_len(ct, dptr, limit, shift);
+	}
+	return dptr - aux;
+}
+
+/* get address length, skiping user info. */
+static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr,
+			  const char *limit, int *shift)
+{
+	const char *start = dptr;
+	int s = *shift;
+
+	/* Search for @, but stop at the end of the line.
+	 * We are inside a sip: URI, so we don't need to worry about
+	 * continuation lines. */
+	while (dptr < limit &&
+	       *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
+		(*shift)++;
+		dptr++;
+	}
+
+	if (dptr < limit && *dptr == '@') {
+		dptr++;
+		(*shift)++;
+	} else {
+		dptr = start;
+		*shift = s;
+	}
+
+	return epaddr_len(ct, dptr, limit, shift);
+}
+
+/* Parse a SIP request line of the form:
+ *
+ * Request-Line = Method SP Request-URI SP SIP-Version CRLF
+ *
+ * and return the offset and length of the address contained in the Request-URI.
+ */
+int ct_sip_parse_request(const struct nf_conn *ct,
+			 const char *dptr, unsigned int datalen,
+			 unsigned int *matchoff, unsigned int *matchlen,
+			 union nf_inet_addr *addr, __be16 *port)
+{
+	const char *start = dptr, *limit = dptr + datalen, *end;
+	unsigned int mlen;
+	unsigned int p;
+	int shift = 0;
+
+	/* Skip method and following whitespace */
+	mlen = string_len(ct, dptr, limit, NULL);
+	if (!mlen)
+		return 0;
+	dptr += mlen;
+	if (++dptr >= limit)
+		return 0;
+
+	/* Find SIP URI */
+	for (; dptr < limit - strlen("sip:"); dptr++) {
+		if (*dptr == '\r' || *dptr == '\n')
+			return -1;
+		if (strncasecmp(dptr, "sip:", strlen("sip:")) == 0) {
+			dptr += strlen("sip:");
+			break;
+		}
+	}
+	if (!skp_epaddr_len(ct, dptr, limit, &shift))
+		return 0;
+	dptr += shift;
+
+	if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
+		return -1;
+	if (end < limit && *end == ':') {
+		end++;
+		p = simple_strtoul(end, (char **)&end, 10);
+		if (p < 1024 || p > 65535)
+			return -1;
+		*port = htons(p);
+	} else
+		*port = htons(SIP_PORT);
+
+	if (end == dptr)
+		return 0;
+	*matchoff = dptr - start;
+	*matchlen = end - dptr;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_request);
+
+/* SIP header parsing: SIP headers are located at the beginning of a line, but
+ * may span several lines, in which case the continuation lines begin with a
+ * whitespace character. RFC 2543 allows lines to be terminated with CR, LF or
+ * CRLF, RFC 3261 allows only CRLF, we support both.
+ *
+ * Headers are followed by (optionally) whitespace, a colon, again (optionally)
+ * whitespace and the values. Whitespace in this context means any amount of
+ * tabs, spaces and continuation lines, which are treated as a single whitespace
+ * character.
+ *
+ * Some headers may appear multiple times. A comma separated list of values is
+ * equivalent to multiple headers.
+ */
+static const struct sip_header ct_sip_hdrs[] = {
+	[SIP_HDR_CSEQ]			= SIP_HDR("CSeq", NULL, NULL, digits_len),
+	[SIP_HDR_FROM]			= SIP_HDR("From", "f", "sip:", skp_epaddr_len),
+	[SIP_HDR_TO]			= SIP_HDR("To", "t", "sip:", skp_epaddr_len),
+	[SIP_HDR_CONTACT]		= SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
+	[SIP_HDR_VIA_UDP]		= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_TCP]		= SIP_HDR("Via", "v", "TCP ", epaddr_len),
+	[SIP_HDR_EXPIRES]		= SIP_HDR("Expires", NULL, NULL, digits_len),
+	[SIP_HDR_CONTENT_LENGTH]	= SIP_HDR("Content-Length", "l", NULL, digits_len),
+	[SIP_HDR_CALL_ID]		= SIP_HDR("Call-Id", "i", NULL, callid_len),
+};
+
+static const char *sip_follow_continuation(const char *dptr, const char *limit)
+{
+	/* Walk past newline */
+	if (++dptr >= limit)
+		return NULL;
+
+	/* Skip '\n' in CR LF */
+	if (*(dptr - 1) == '\r' && *dptr == '\n') {
+		if (++dptr >= limit)
+			return NULL;
+	}
+
+	/* Continuation line? */
+	if (*dptr != ' ' && *dptr != '\t')
+		return NULL;
+
+	/* skip leading whitespace */
+	for (; dptr < limit; dptr++) {
+		if (*dptr != ' ' && *dptr != '\t')
+			break;
+	}
+	return dptr;
+}
+
+static const char *sip_skip_whitespace(const char *dptr, const char *limit)
+{
+	for (; dptr < limit; dptr++) {
+		if (*dptr == ' ' || *dptr == '\t')
+			continue;
+		if (*dptr != '\r' && *dptr != '\n')
+			break;
+		dptr = sip_follow_continuation(dptr, limit);
+		break;
+	}
+	return dptr;
+}
+
+/* Search within a SIP header value, dealing with continuation lines */
+static const char *ct_sip_header_search(const char *dptr, const char *limit,
+					const char *needle, unsigned int len)
+{
+	for (limit -= len; dptr < limit; dptr++) {
+		if (*dptr == '\r' || *dptr == '\n') {
+			dptr = sip_follow_continuation(dptr, limit);
+			if (dptr == NULL)
+				break;
+			continue;
+		}
+
+		if (strncasecmp(dptr, needle, len) == 0)
+			return dptr;
+	}
+	return NULL;
+}
+
+int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
+		      unsigned int dataoff, unsigned int datalen,
+		      enum sip_header_types type,
+		      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const struct sip_header *hdr = &ct_sip_hdrs[type];
+	const char *start = dptr, *limit = dptr + datalen;
+	int shift = 0;
+
+	for (dptr += dataoff; dptr < limit; dptr++) {
+		/* Find beginning of line */
+		if (*dptr != '\r' && *dptr != '\n')
+			continue;
+		if (++dptr >= limit)
+			break;
+		if (*(dptr - 1) == '\r' && *dptr == '\n') {
+			if (++dptr >= limit)
+				break;
+		}
+
+		/* Skip continuation lines */
+		if (*dptr == ' ' || *dptr == '\t')
+			continue;
+
+		/* Find header. Compact headers must be followed by a
+		 * non-alphabetic character to avoid mismatches. */
+		if (limit - dptr >= hdr->len &&
+		    strncasecmp(dptr, hdr->name, hdr->len) == 0)
+			dptr += hdr->len;
+		else if (hdr->cname && limit - dptr >= hdr->clen + 1 &&
+			 strncasecmp(dptr, hdr->cname, hdr->clen) == 0 &&
+			 !isalpha(*(dptr + hdr->clen)))
+			dptr += hdr->clen;
+		else
+			continue;
+
+		/* Find and skip colon */
+		dptr = sip_skip_whitespace(dptr, limit);
+		if (dptr == NULL)
+			break;
+		if (*dptr != ':' || ++dptr >= limit)
+			break;
+
+		/* Skip whitespace after colon */
+		dptr = sip_skip_whitespace(dptr, limit);
+		if (dptr == NULL)
+			break;
+
+		*matchoff = dptr - start;
+		if (hdr->search) {
+			dptr = ct_sip_header_search(dptr, limit, hdr->search,
+						    hdr->slen);
+			if (!dptr)
+				return -1;
+			dptr += hdr->slen;
+		}
+
+		*matchlen = hdr->match_len(ct, dptr, limit, &shift);
+		if (!*matchlen)
+			return -1;
+		*matchoff = dptr - start + shift;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ct_sip_get_header);
+
+/* Get next header field in a list of comma separated values */
+static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
+			      unsigned int dataoff, unsigned int datalen,
+			      enum sip_header_types type,
+			      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const struct sip_header *hdr = &ct_sip_hdrs[type];
+	const char *start = dptr, *limit = dptr + datalen;
+	int shift = 0;
+
+	dptr += dataoff;
+
+	dptr = ct_sip_header_search(dptr, limit, ",", strlen(","));
+	if (!dptr)
+		return 0;
+
+	dptr = ct_sip_header_search(dptr, limit, hdr->search, hdr->slen);
+	if (!dptr)
+		return 0;
+	dptr += hdr->slen;
+
+	*matchoff = dptr - start;
+	*matchlen = hdr->match_len(ct, dptr, limit, &shift);
+	if (!*matchlen)
+		return -1;
+	*matchoff += shift;
+	return 1;
+}
+
+/* Walk through headers until a parsable one is found or no header of the
+ * given type is left. */
+static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+			       unsigned int dataoff, unsigned int datalen,
+			       enum sip_header_types type, int *in_header,
+			       unsigned int *matchoff, unsigned int *matchlen)
+{
+	int ret;
+
+	if (in_header && *in_header) {
+		while (1) {
+			ret = ct_sip_next_header(ct, dptr, dataoff, datalen,
+						 type, matchoff, matchlen);
+			if (ret > 0)
+				return ret;
+			if (ret == 0)
+				break;
+			dataoff = *matchoff;
+		}
+		*in_header = 0;
+	}
+
+	while (1) {
+		ret = ct_sip_get_header(ct, dptr, dataoff, datalen,
+					type, matchoff, matchlen);
+		if (ret > 0)
+			break;
+		if (ret == 0)
+			return ret;
+		dataoff = *matchoff;
+	}
+
+	if (in_header)
+		*in_header = 1;
+	return 1;
+}
+
+/* Locate a SIP header, parse the URI and return the offset and length of
+ * the address as well as the address and port themselves. A stream of
+ * headers can be parsed by handing in a non-NULL datalen and in_header
+ * pointer.
+ */
+int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
+			    unsigned int *dataoff, unsigned int datalen,
+			    enum sip_header_types type, int *in_header,
+			    unsigned int *matchoff, unsigned int *matchlen,
+			    union nf_inet_addr *addr, __be16 *port)
+{
+	const char *c, *limit = dptr + datalen;
+	unsigned int p;
+	int ret;
+
+	ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen,
+				  type, in_header, matchoff, matchlen);
+	WARN_ON(ret < 0);
+	if (ret == 0)
+		return ret;
+
+	if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
+		return -1;
+	if (*c == ':') {
+		c++;
+		p = simple_strtoul(c, (char **)&c, 10);
+		if (p < 1024 || p > 65535)
+			return -1;
+		*port = htons(p);
+	} else
+		*port = htons(SIP_PORT);
+
+	if (dataoff)
+		*dataoff = c - dptr;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
+
+static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr,
+			      unsigned int dataoff, unsigned int datalen,
+			      const char *name,
+			      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const char *limit = dptr + datalen;
+	const char *start;
+	const char *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+	start += strlen(name);
+
+	end = ct_sip_header_search(start, limit, ";", strlen(";"));
+	if (!end)
+		end = limit;
+
+	*matchoff = start - dptr;
+	*matchlen = end - start;
+	return 1;
+}
+
+/* Parse address from header parameter and return address, offset and length */
+int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
+			       unsigned int dataoff, unsigned int datalen,
+			       const char *name,
+			       unsigned int *matchoff, unsigned int *matchlen,
+			       union nf_inet_addr *addr, bool delim)
+{
+	const char *limit = dptr + datalen;
+	const char *start, *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+
+	start += strlen(name);
+	if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
+		return 0;
+	*matchoff = start - dptr;
+	*matchlen = end - start;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_address_param);
+
+/* Parse numerical header parameter and return value, offset and length */
+int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+				 unsigned int dataoff, unsigned int datalen,
+				 const char *name,
+				 unsigned int *matchoff, unsigned int *matchlen,
+				 unsigned int *val)
+{
+	const char *limit = dptr + datalen;
+	const char *start;
+	char *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+
+	start += strlen(name);
+	*val = simple_strtoul(start, &end, 0);
+	if (start == end)
+		return -1;
+	if (matchoff && matchlen) {
+		*matchoff = start - dptr;
+		*matchlen = end - start;
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
+
+static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
+				  unsigned int dataoff, unsigned int datalen,
+				  u8 *proto)
+{
+	unsigned int matchoff, matchlen;
+
+	if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=",
+			       &matchoff, &matchlen)) {
+		if (!strncasecmp(dptr + matchoff, "TCP", strlen("TCP")))
+			*proto = IPPROTO_TCP;
+		else if (!strncasecmp(dptr + matchoff, "UDP", strlen("UDP")))
+			*proto = IPPROTO_UDP;
+		else
+			return 0;
+
+		if (*proto != nf_ct_protonum(ct))
+			return 0;
+	} else
+		*proto = nf_ct_protonum(ct);
+
+	return 1;
+}
+
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit)
+{
+	const char *end;
+	int ret;
+
+	memset(addr, 0, sizeof(*addr));
+	switch (nf_ct_l3num(ct)) {
+	case AF_INET:
+		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		break;
+	case AF_INET6:
+		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		break;
+	default:
+		BUG();
+	}
+
+	if (ret == 0)
+		return 0;
+	if (endp)
+		*endp = end;
+	return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+			const char *limit, int *shift)
+{
+	union nf_inet_addr addr;
+	const char *aux = dptr;
+
+	if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+		pr_debug("ip: %s parse failed.!\n", dptr);
+		return 0;
+	}
+
+	return dptr - aux;
+}
+
+/* SDP header parsing: a SDP session description contains an ordered set of
+ * headers, starting with a section containing general session parameters,
+ * optionally followed by multiple media descriptions.
+ *
+ * SDP headers always start at the beginning of a line. According to RFC 2327:
+ * "The sequence CRLF (0x0d0a) is used to end a record, although parsers should
+ * be tolerant and also accept records terminated with a single newline
+ * character". We handle both cases.
+ */
+static const struct sip_header ct_sdp_hdrs_v4[] = {
+	[SDP_HDR_VERSION]	= SDP_HDR("v=", NULL, digits_len),
+	[SDP_HDR_OWNER]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_MEDIA]		= SDP_HDR("m=", NULL, media_len),
+};
+
+static const struct sip_header ct_sdp_hdrs_v6[] = {
+	[SDP_HDR_VERSION]	= SDP_HDR("v=", NULL, digits_len),
+	[SDP_HDR_OWNER]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_MEDIA]		= SDP_HDR("m=", NULL, media_len),
+};
+
+/* Linear string search within SDP header values */
+static const char *ct_sdp_header_search(const char *dptr, const char *limit,
+					const char *needle, unsigned int len)
+{
+	for (limit -= len; dptr < limit; dptr++) {
+		if (*dptr == '\r' || *dptr == '\n')
+			break;
+		if (strncmp(dptr, needle, len) == 0)
+			return dptr;
+	}
+	return NULL;
+}
+
+/* Locate a SDP header (optionally a substring within the header value),
+ * optionally stopping at the first occurrence of the term header, parse
+ * it and return the offset and length of the data we're interested in.
+ */
+int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
+			  unsigned int dataoff, unsigned int datalen,
+			  enum sdp_header_types type,
+			  enum sdp_header_types term,
+			  unsigned int *matchoff, unsigned int *matchlen)
+{
+	const struct sip_header *hdrs, *hdr, *thdr;
+	const char *start = dptr, *limit = dptr + datalen;
+	int shift = 0;
+
+	hdrs = nf_ct_l3num(ct) == NFPROTO_IPV4 ? ct_sdp_hdrs_v4 : ct_sdp_hdrs_v6;
+	hdr = &hdrs[type];
+	thdr = &hdrs[term];
+
+	for (dptr += dataoff; dptr < limit; dptr++) {
+		/* Find beginning of line */
+		if (*dptr != '\r' && *dptr != '\n')
+			continue;
+		if (++dptr >= limit)
+			break;
+		if (*(dptr - 1) == '\r' && *dptr == '\n') {
+			if (++dptr >= limit)
+				break;
+		}
+
+		if (term != SDP_HDR_UNSPEC &&
+		    limit - dptr >= thdr->len &&
+		    strncasecmp(dptr, thdr->name, thdr->len) == 0)
+			break;
+		else if (limit - dptr >= hdr->len &&
+			 strncasecmp(dptr, hdr->name, hdr->len) == 0)
+			dptr += hdr->len;
+		else
+			continue;
+
+		*matchoff = dptr - start;
+		if (hdr->search) {
+			dptr = ct_sdp_header_search(dptr, limit, hdr->search,
+						    hdr->slen);
+			if (!dptr)
+				return -1;
+			dptr += hdr->slen;
+		}
+
+		*matchlen = hdr->match_len(ct, dptr, limit, &shift);
+		if (!*matchlen)
+			return -1;
+		*matchoff = dptr - start + shift;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ct_sip_get_sdp_header);
+
+static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
+				 unsigned int dataoff, unsigned int datalen,
+				 enum sdp_header_types type,
+				 enum sdp_header_types term,
+				 unsigned int *matchoff, unsigned int *matchlen,
+				 union nf_inet_addr *addr)
+{
+	int ret;
+
+	ret = ct_sip_get_sdp_header(ct, dptr, dataoff, datalen, type, term,
+				    matchoff, matchlen);
+	if (ret <= 0)
+		return ret;
+
+	if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+			    dptr + *matchoff + *matchlen))
+		return -1;
+	return 1;
+}
+
+static int refresh_signalling_expectation(struct nf_conn *ct,
+					  union nf_inet_addr *addr,
+					  u8 proto, __be16 port,
+					  unsigned int expires)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *next;
+	int found = 0;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
+		if (exp->class != SIP_EXPECT_SIGNALLING ||
+		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+		    exp->tuple.dst.protonum != proto ||
+		    exp->tuple.dst.u.udp.port != port)
+			continue;
+		if (mod_timer_pending(&exp->timeout, jiffies + expires * HZ)) {
+			exp->flags &= ~NF_CT_EXPECT_INACTIVE;
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+	return found;
+}
+
+static void flush_expectations(struct nf_conn *ct, bool media)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *next;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
+		if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
+			continue;
+		if (!nf_ct_remove_expect(exp))
+			continue;
+		if (!media)
+			break;
+	}
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
+				 unsigned int dataoff,
+				 const char **dptr, unsigned int *datalen,
+				 union nf_inet_addr *daddr, __be16 port,
+				 enum sip_expectation_classes class,
+				 unsigned int mediaoff, unsigned int medialen)
+{
+	struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct net *net = nf_ct_net(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	union nf_inet_addr *saddr;
+	struct nf_conntrack_tuple tuple;
+	int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
+	u_int16_t base_port;
+	__be16 rtp_port, rtcp_port;
+	const struct nf_nat_sip_hooks *hooks;
+
+	saddr = NULL;
+	if (sip_direct_media) {
+		if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3))
+			return NF_ACCEPT;
+		saddr = &ct->tuplehash[!dir].tuple.src.u3;
+	} else if (sip_external_media) {
+		struct net_device *dev = skb_dst(skb)->dev;
+		struct net *net = dev_net(dev);
+		struct flowi fl;
+		struct dst_entry *dst = NULL;
+
+		memset(&fl, 0, sizeof(fl));
+
+		switch (nf_ct_l3num(ct)) {
+			case NFPROTO_IPV4:
+				fl.u.ip4.daddr = daddr->ip;
+				nf_ip_route(net, &dst, &fl, false);
+				break;
+
+			case NFPROTO_IPV6:
+				fl.u.ip6.daddr = daddr->in6;
+				nf_ip6_route(net, &dst, &fl, false);
+				break;
+		}
+
+		/* Don't predict any conntracks when media endpoint is reachable
+		 * through the same interface as the signalling peer.
+		 */
+		if (dst) {
+			bool external_media = (dst->dev == dev);
+
+			dst_release(dst);
+			if (external_media)
+				return NF_ACCEPT;
+		}
+	}
+
+	/* We need to check whether the registration exists before attempting
+	 * to register it since we can see the same media description multiple
+	 * times on different connections in case multiple endpoints receive
+	 * the same call.
+	 *
+	 * RTP optimization: if we find a matching media channel expectation
+	 * and both the expectation and this connection are SNATed, we assume
+	 * both sides can reach each other directly and use the final
+	 * destination address from the expectation. We still need to keep
+	 * the NATed expectations for media that might arrive from the
+	 * outside, and additionally need to expect the direct RTP stream
+	 * in case it passes through us even without NAT.
+	 */
+	memset(&tuple, 0, sizeof(tuple));
+	if (saddr)
+		tuple.src.u3 = *saddr;
+	tuple.src.l3num		= nf_ct_l3num(ct);
+	tuple.dst.protonum	= IPPROTO_UDP;
+	tuple.dst.u3		= *daddr;
+	tuple.dst.u.udp.port	= port;
+
+	do {
+		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
+
+		if (!exp || exp->master == ct ||
+		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
+		    exp->class != class)
+			break;
+#if IS_ENABLED(CONFIG_NF_NAT)
+		if (!direct_rtp &&
+		    (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) ||
+		     exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
+		    ct->status & IPS_NAT_MASK) {
+			*daddr			= exp->saved_addr;
+			tuple.dst.u3		= exp->saved_addr;
+			tuple.dst.u.udp.port	= exp->saved_proto.udp.port;
+			direct_rtp = 1;
+		} else
+#endif
+			skip_expect = 1;
+	} while (!skip_expect);
+
+	base_port = ntohs(tuple.dst.u.udp.port) & ~1;
+	rtp_port = htons(base_port);
+	rtcp_port = htons(base_port + 1);
+
+	if (direct_rtp) {
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks &&
+		    !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen,
+				     mediaoff, medialen, ntohs(rtp_port)))
+			goto err1;
+	}
+
+	if (skip_expect)
+		return NF_ACCEPT;
+
+	rtp_exp = nf_ct_expect_alloc(ct);
+	if (rtp_exp == NULL)
+		goto err1;
+	nf_ct_expect_init(rtp_exp, class, nf_ct_l3num(ct), saddr, daddr,
+			  IPPROTO_UDP, NULL, &rtp_port);
+
+	rtcp_exp = nf_ct_expect_alloc(ct);
+	if (rtcp_exp == NULL)
+		goto err2;
+	nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
+			  IPPROTO_UDP, NULL, &rtcp_port);
+
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp)
+		ret = hooks->sdp_media(skb, protoff, dataoff, dptr,
+				       datalen, rtp_exp, rtcp_exp,
+				       mediaoff, medialen, daddr);
+	else {
+		/* -EALREADY handling works around end-points that send
+		 * SDP messages with identical port but different media type,
+		 * we pretend expectation was set up.
+		 * It also works in the case that SDP messages are sent with
+		 * identical expect tuples but for different master conntracks.
+		 */
+		int errp = nf_ct_expect_related(rtp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
+
+		if (errp == 0 || errp == -EALREADY) {
+			int errcp = nf_ct_expect_related(rtcp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
+
+			if (errcp == 0 || errcp == -EALREADY)
+				ret = NF_ACCEPT;
+			else if (errp == 0)
+				nf_ct_unexpect_related(rtp_exp);
+		}
+	}
+	nf_ct_expect_put(rtcp_exp);
+err2:
+	nf_ct_expect_put(rtp_exp);
+err1:
+	return ret;
+}
+
+static const struct sdp_media_type sdp_media_types[] = {
+	SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
+	SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+	SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE),
+};
+
+static const struct sdp_media_type *sdp_media_type(const char *dptr,
+						   unsigned int matchoff,
+						   unsigned int matchlen)
+{
+	const struct sdp_media_type *t;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sdp_media_types); i++) {
+		t = &sdp_media_types[i];
+		if (matchlen < t->len ||
+		    strncmp(dptr + matchoff, t->name, t->len))
+			continue;
+		return t;
+	}
+	return NULL;
+}
+
+static int process_sdp(struct sk_buff *skb, unsigned int protoff,
+		       unsigned int dataoff,
+		       const char **dptr, unsigned int *datalen,
+		       unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchoff, matchlen;
+	unsigned int mediaoff, medialen;
+	unsigned int sdpoff;
+	unsigned int caddr_len, maddr_len;
+	unsigned int i;
+	union nf_inet_addr caddr, maddr, rtp_addr;
+	const struct nf_nat_sip_hooks *hooks;
+	unsigned int port;
+	const struct sdp_media_type *t;
+	int ret = NF_ACCEPT;
+
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+
+	/* Find beginning of session description */
+	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+				  SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+				  &matchoff, &matchlen) <= 0)
+		return NF_ACCEPT;
+	sdpoff = matchoff;
+
+	/* The connection information is contained in the session description
+	 * and/or once per media description. The first media description marks
+	 * the end of the session description. */
+	caddr_len = 0;
+	if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
+				  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+				  &matchoff, &matchlen, &caddr) > 0)
+		caddr_len = matchlen;
+
+	mediaoff = sdpoff;
+	for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
+		if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen,
+					  SDP_HDR_MEDIA, SDP_HDR_UNSPEC,
+					  &mediaoff, &medialen) <= 0)
+			break;
+
+		/* Get media type and port number. A media port value of zero
+		 * indicates an inactive stream. */
+		t = sdp_media_type(*dptr, mediaoff, medialen);
+		if (!t) {
+			mediaoff += medialen;
+			continue;
+		}
+		mediaoff += t->len;
+		medialen -= t->len;
+
+		port = simple_strtoul(*dptr + mediaoff, NULL, 10);
+		if (port == 0)
+			continue;
+		if (port < 1024 || port > 65535) {
+			nf_ct_helper_log(skb, ct, "wrong port %u", port);
+			return NF_DROP;
+		}
+
+		/* The media description overrides the session description. */
+		maddr_len = 0;
+		if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen,
+					  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+					  &matchoff, &matchlen, &maddr) > 0) {
+			maddr_len = matchlen;
+			memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
+		} else if (caddr_len)
+			memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
+		else {
+			nf_ct_helper_log(skb, ct, "cannot parse SDP message");
+			return NF_DROP;
+		}
+
+		ret = set_expected_rtp_rtcp(skb, protoff, dataoff,
+					    dptr, datalen,
+					    &rtp_addr, htons(port), t->class,
+					    mediaoff, medialen);
+		if (ret != NF_ACCEPT) {
+			nf_ct_helper_log(skb, ct,
+					 "cannot add expectation for voice");
+			return ret;
+		}
+
+		/* Update media connection address if present */
+		if (maddr_len && hooks && ct->status & IPS_NAT_MASK) {
+			ret = hooks->sdp_addr(skb, protoff, dataoff,
+					      dptr, datalen, mediaoff,
+					      SDP_HDR_CONNECTION,
+					      SDP_HDR_MEDIA,
+					      &rtp_addr);
+			if (ret != NF_ACCEPT) {
+				nf_ct_helper_log(skb, ct, "cannot mangle SDP");
+				return ret;
+			}
+		}
+		i++;
+	}
+
+	/* Update session connection and owner addresses */
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->sdp_session(skb, protoff, dataoff,
+					 dptr, datalen, sdpoff,
+					 &rtp_addr);
+
+	return ret;
+}
+static int process_invite_response(struct sk_buff *skb, unsigned int protoff,
+				   unsigned int dataoff,
+				   const char **dptr, unsigned int *datalen,
+				   unsigned int cseq, unsigned int code)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+
+	if ((code >= 100 && code <= 199) ||
+	    (code >= 200 && code <= 299))
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
+	else if (ct_sip_info->invite_cseq == cseq)
+		flush_expectations(ct, true);
+	return NF_ACCEPT;
+}
+
+static int process_update_response(struct sk_buff *skb, unsigned int protoff,
+				   unsigned int dataoff,
+				   const char **dptr, unsigned int *datalen,
+				   unsigned int cseq, unsigned int code)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+
+	if ((code >= 100 && code <= 199) ||
+	    (code >= 200 && code <= 299))
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
+	else if (ct_sip_info->invite_cseq == cseq)
+		flush_expectations(ct, true);
+	return NF_ACCEPT;
+}
+
+static int process_prack_response(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int cseq, unsigned int code)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+
+	if ((code >= 100 && code <= 199) ||
+	    (code >= 200 && code <= 299))
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
+	else if (ct_sip_info->invite_cseq == cseq)
+		flush_expectations(ct, true);
+	return NF_ACCEPT;
+}
+
+static int process_invite_request(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	unsigned int ret;
+
+	flush_expectations(ct, true);
+	ret = process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
+	if (ret == NF_ACCEPT)
+		ct_sip_info->invite_cseq = cseq;
+	return ret;
+}
+
+static int process_bye_request(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr, unsigned int *datalen,
+			       unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+	flush_expectations(ct, true);
+	return NF_ACCEPT;
+}
+
+/* Parse a REGISTER request and create a permanent expectation for incoming
+ * signalling connections. The expectation is marked inactive and is activated
+ * when receiving a response indicating success from the registrar.
+ */
+static int process_register_request(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned int matchoff, matchlen;
+	struct nf_conntrack_expect *exp;
+	union nf_inet_addr *saddr, daddr;
+	const struct nf_nat_sip_hooks *hooks;
+	struct nf_conntrack_helper *helper;
+	__be16 port;
+	u8 proto;
+	unsigned int expires = 0;
+	int ret;
+
+	/* Expected connections can not register again. */
+	if (ct->status & IPS_EXPECTED)
+		return NF_ACCEPT;
+
+	/* We must check the expiration time: a value of zero signals the
+	 * registrar to release the binding. We'll remove our expectation
+	 * when receiving the new bindings in the response, but we don't
+	 * want to create new ones.
+	 *
+	 * The expiration time may be contained in Expires: header, the
+	 * Contact: header parameters or the URI parameters.
+	 */
+	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES,
+			      &matchoff, &matchlen) > 0)
+		expires = simple_strtoul(*dptr + matchoff, NULL, 10);
+
+	ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				      SIP_HDR_CONTACT, NULL,
+				      &matchoff, &matchlen, &daddr, &port);
+	if (ret < 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse contact");
+		return NF_DROP;
+	} else if (ret == 0)
+		return NF_ACCEPT;
+
+	/* We don't support third-party registrations */
+	if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
+		return NF_ACCEPT;
+
+	if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen,
+				   &proto) == 0)
+		return NF_ACCEPT;
+
+	if (ct_sip_parse_numerical_param(ct, *dptr,
+					 matchoff + matchlen, *datalen,
+					 "expires=", NULL, NULL, &expires) < 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse expires");
+		return NF_DROP;
+	}
+
+	if (expires == 0) {
+		ret = NF_ACCEPT;
+		goto store_cseq;
+	}
+
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp) {
+		nf_ct_helper_log(skb, ct, "cannot alloc expectation");
+		return NF_DROP;
+	}
+
+	saddr = NULL;
+	if (sip_direct_signalling)
+		saddr = &ct->tuplehash[!dir].tuple.src.u3;
+
+	helper = rcu_dereference(nfct_help(ct)->helper);
+	if (!helper)
+		return NF_DROP;
+
+	nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
+			  saddr, &daddr, proto, NULL, &port);
+	exp->timeout.expires = sip_timeout * HZ;
+	exp->helper = helper;
+	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
+
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
+				    exp, matchoff, matchlen);
+	else {
+		if (nf_ct_expect_related(exp, 0) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
+			ret = NF_DROP;
+		} else
+			ret = NF_ACCEPT;
+	}
+	nf_ct_expect_put(exp);
+
+store_cseq:
+	if (ret == NF_ACCEPT)
+		ct_sip_info->register_cseq = cseq;
+	return ret;
+}
+
+static int process_register_response(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
+				     const char **dptr, unsigned int *datalen,
+				     unsigned int cseq, unsigned int code)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	union nf_inet_addr addr;
+	__be16 port;
+	u8 proto;
+	unsigned int matchoff, matchlen, coff = 0;
+	unsigned int expires = 0;
+	int in_contact = 0, ret;
+
+	/* According to RFC 3261, "UAs MUST NOT send a new registration until
+	 * they have received a final response from the registrar for the
+	 * previous one or the previous REGISTER request has timed out".
+	 *
+	 * However, some servers fail to detect retransmissions and send late
+	 * responses, so we store the sequence number of the last valid
+	 * request and compare it here.
+	 */
+	if (ct_sip_info->register_cseq != cseq)
+		return NF_ACCEPT;
+
+	if (code >= 100 && code <= 199)
+		return NF_ACCEPT;
+	if (code < 200 || code > 299)
+		goto flush;
+
+	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES,
+			      &matchoff, &matchlen) > 0)
+		expires = simple_strtoul(*dptr + matchoff, NULL, 10);
+
+	while (1) {
+		unsigned int c_expires = expires;
+
+		ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
+					      SIP_HDR_CONTACT, &in_contact,
+					      &matchoff, &matchlen,
+					      &addr, &port);
+		if (ret < 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse contact");
+			return NF_DROP;
+		} else if (ret == 0)
+			break;
+
+		/* We don't support third-party registrations */
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
+			continue;
+
+		if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen,
+					   *datalen, &proto) == 0)
+			continue;
+
+		ret = ct_sip_parse_numerical_param(ct, *dptr,
+						   matchoff + matchlen,
+						   *datalen, "expires=",
+						   NULL, NULL, &c_expires);
+		if (ret < 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse expires");
+			return NF_DROP;
+		}
+		if (c_expires == 0)
+			break;
+		if (refresh_signalling_expectation(ct, &addr, proto, port,
+						   c_expires))
+			return NF_ACCEPT;
+	}
+
+flush:
+	flush_expectations(ct, false);
+	return NF_ACCEPT;
+}
+
+static const struct sip_handler sip_handlers[] = {
+	SIP_HANDLER("INVITE", process_invite_request, process_invite_response),
+	SIP_HANDLER("UPDATE", process_sdp, process_update_response),
+	SIP_HANDLER("ACK", process_sdp, NULL),
+	SIP_HANDLER("PRACK", process_sdp, process_prack_response),
+	SIP_HANDLER("BYE", process_bye_request, NULL),
+	SIP_HANDLER("REGISTER", process_register_request, process_register_response),
+};
+
+static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
+				unsigned int dataoff,
+				const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchoff, matchlen, matchend;
+	unsigned int code, cseq, i;
+
+	if (*datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+	code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10);
+	if (!code) {
+		nf_ct_helper_log(skb, ct, "cannot get code");
+		return NF_DROP;
+	}
+
+	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
+			      &matchoff, &matchlen) <= 0) {
+		nf_ct_helper_log(skb, ct, "cannot parse cseq");
+		return NF_DROP;
+	}
+	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
+	if (!cseq && *(*dptr + matchoff) != '0') {
+		nf_ct_helper_log(skb, ct, "cannot get cseq");
+		return NF_DROP;
+	}
+	matchend = matchoff + matchlen + 1;
+
+	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+		const struct sip_handler *handler;
+
+		handler = &sip_handlers[i];
+		if (handler->response == NULL)
+			continue;
+		if (*datalen < matchend + handler->len ||
+		    strncasecmp(*dptr + matchend, handler->method, handler->len))
+			continue;
+		return handler->response(skb, protoff, dataoff, dptr, datalen,
+					 cseq, code);
+	}
+	return NF_ACCEPT;
+}
+
+static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned int matchoff, matchlen;
+	unsigned int cseq, i;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	/* Many Cisco IP phones use a high source port for SIP requests, but
+	 * listen for the response on port 5060.  If we are the local
+	 * router for one of these phones, save the port number from the
+	 * Via: header so that nf_nat_sip can redirect the responses to
+	 * the correct port.
+	 */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff,
+				    &matchlen, &addr, &port) > 0 &&
+	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
+	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
+		ct_sip_info->forced_dport = port;
+
+	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+		const struct sip_handler *handler;
+
+		handler = &sip_handlers[i];
+		if (handler->request == NULL)
+			continue;
+		if (*datalen < handler->len + 2 ||
+		    strncasecmp(*dptr, handler->method, handler->len))
+			continue;
+		if ((*dptr)[handler->len] != ' ' ||
+		    !isalpha((*dptr)[handler->len+1]))
+			continue;
+
+		if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
+				      &matchoff, &matchlen) <= 0) {
+			nf_ct_helper_log(skb, ct, "cannot parse cseq");
+			return NF_DROP;
+		}
+		cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
+		if (!cseq && *(*dptr + matchoff) != '0') {
+			nf_ct_helper_log(skb, ct, "cannot get cseq");
+			return NF_DROP;
+		}
+
+		return handler->request(skb, protoff, dataoff, dptr, datalen,
+					cseq);
+	}
+	return NF_ACCEPT;
+}
+
+static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
+			   unsigned int protoff, unsigned int dataoff,
+			   const char **dptr, unsigned int *datalen)
+{
+	const struct nf_nat_sip_hooks *hooks;
+	int ret;
+
+	if (strncasecmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+		ret = process_sip_request(skb, protoff, dataoff, dptr, datalen);
+	else
+		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
+
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks && !hooks->msg(skb, protoff, dataoff,
+					 dptr, datalen)) {
+			nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
+			ret = NF_DROP;
+		}
+	}
+
+	return ret;
+}
+
+static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	struct tcphdr *th, _tcph;
+	unsigned int dataoff, datalen;
+	unsigned int matchoff, matchlen, clen;
+	unsigned int msglen, origlen;
+	const char *dptr, *end;
+	s16 diff, tdiff = 0;
+	int ret = NF_ACCEPT;
+	bool term;
+
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED_REPLY)
+		return NF_ACCEPT;
+
+	/* No Data ? */
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+	dataoff = protoff + th->doff * 4;
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+	if (unlikely(skb_linearize(skb)))
+		return NF_DROP;
+
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+	if (datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+
+	while (1) {
+		if (ct_sip_get_header(ct, dptr, 0, datalen,
+				      SIP_HDR_CONTENT_LENGTH,
+				      &matchoff, &matchlen) <= 0)
+			break;
+
+		clen = simple_strtoul(dptr + matchoff, (char **)&end, 10);
+		if (dptr + matchoff == end)
+			break;
+
+		term = false;
+		for (; end + strlen("\r\n\r\n") <= dptr + datalen; end++) {
+			if (end[0] == '\r' && end[1] == '\n' &&
+			    end[2] == '\r' && end[3] == '\n') {
+				term = true;
+				break;
+			}
+		}
+		if (!term)
+			break;
+		end += strlen("\r\n\r\n") + clen;
+
+		msglen = origlen = end - dptr;
+		if (msglen > datalen)
+			return NF_ACCEPT;
+
+		ret = process_sip_msg(skb, ct, protoff, dataoff,
+				      &dptr, &msglen);
+		/* process_sip_* functions report why this packet is dropped */
+		if (ret != NF_ACCEPT)
+			break;
+		diff     = msglen - origlen;
+		tdiff   += diff;
+
+		dataoff += msglen;
+		dptr    += msglen;
+		datalen  = datalen + diff - msglen;
+	}
+
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		const struct nf_nat_sip_hooks *hooks;
+
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks)
+			hooks->seq_adjust(skb, protoff, tdiff);
+	}
+
+	return ret;
+}
+
+static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const char *dptr;
+
+	/* No Data ? */
+	dataoff = protoff + sizeof(struct udphdr);
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+	if (unlikely(skb_linearize(skb)))
+		return NF_DROP;
+
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+	if (datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+
+	return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly;
+
+static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
+	[SIP_EXPECT_SIGNALLING] = {
+		.name		= "signalling",
+		.max_expected	= 1,
+		.timeout	= 3 * 60,
+	},
+	[SIP_EXPECT_AUDIO] = {
+		.name		= "audio",
+		.max_expected	= 2 * IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
+	[SIP_EXPECT_VIDEO] = {
+		.name		= "video",
+		.max_expected	= 2 * IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
+	[SIP_EXPECT_IMAGE] = {
+		.name		= "image",
+		.max_expected	= IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
+};
+
+static void __exit nf_conntrack_sip_fini(void)
+{
+	nf_conntrack_helpers_unregister(sip, ports_c * 4);
+}
+
+static int __init nf_conntrack_sip_init(void)
+{
+	int i, ret;
+
+	NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sip_master));
+
+	if (ports_c == 0)
+		ports[ports_c++] = SIP_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp,
+				  NULL, THIS_MODULE);
+		nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP,
+				  HELPER_NAME, SIP_PORT, ports[i], i,
+				  sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp,
+				  NULL, THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(sip, ports_c * 4);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
+	}
+	return 0;
+}
+
+module_init(nf_conntrack_sip_init);
+module_exit(nf_conntrack_sip_fini);
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 000000000..daacf2023
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *      SNMP service broadcast connection tracking helper
+ *
+ *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
+
+#define SNMP_PORT	161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, 0400);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+			unsigned int protoff,
+			struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+			       struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo)
+{
+	typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+	nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
+
+	nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+	if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+		return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+	return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+	.max_expected	= 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+	.name			= "snmp",
+	.tuple.src.l3num	= NFPROTO_IPV4,
+	.tuple.src.u.udp.port	= cpu_to_be16(SNMP_PORT),
+	.tuple.dst.protonum	= IPPROTO_UDP,
+	.me			= THIS_MODULE,
+	.help			= snmp_conntrack_help,
+	.expect_policy		= &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+	exp_policy.timeout = timeout;
+	return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+	nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
new file mode 100644
index 000000000..52245dbfa
--- /dev/null
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -0,0 +1,1254 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/netdevice.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#ifdef CONFIG_LWTUNNEL
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+#endif
+#include <linux/rculist_nulls.h>
+
+static bool enable_hooks __read_mostly;
+MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks");
+module_param(enable_hooks, bool, 0000);
+
+unsigned int nf_conntrack_net_id __read_mostly;
+
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+void
+print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+            const struct nf_conntrack_l4proto *l4proto)
+{
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		seq_printf(s, "src=%pI4 dst=%pI4 ",
+			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
+		break;
+	case NFPROTO_IPV6:
+		seq_printf(s, "src=%pI6 dst=%pI6 ",
+			   tuple->src.u3.ip6, tuple->dst.u3.ip6);
+		break;
+	default:
+		break;
+	}
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_TCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.tcp.port),
+			   ntohs(tuple->dst.u.tcp.port));
+		break;
+	case IPPROTO_UDPLITE:
+	case IPPROTO_UDP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.udp.port),
+			   ntohs(tuple->dst.u.udp.port));
+
+		break;
+	case IPPROTO_DCCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.dccp.port),
+			   ntohs(tuple->dst.u.dccp.port));
+		break;
+	case IPPROTO_SCTP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.sctp.port),
+			   ntohs(tuple->dst.u.sctp.port));
+		break;
+	case IPPROTO_ICMPV6:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_GRE:
+		seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+			   ntohs(tuple->src.u.gre.key),
+			   ntohs(tuple->dst.u.gre.key));
+		break;
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(print_tuple);
+
+struct ct_iter_state {
+	struct seq_net_private p;
+	struct hlist_nulls_head *hash;
+	unsigned int htable_size;
+	unsigned int bucket;
+	u_int64_t time_now;
+};
+
+static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
+{
+	struct ct_iter_state *st = seq->private;
+	struct hlist_nulls_node *n;
+
+	for (st->bucket = 0;
+	     st->bucket < st->htable_size;
+	     st->bucket++) {
+		n = rcu_dereference(
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
+		if (!is_a_nulls(n))
+			return n;
+	}
+	return NULL;
+}
+
+static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
+				      struct hlist_nulls_node *head)
+{
+	struct ct_iter_state *st = seq->private;
+
+	head = rcu_dereference(hlist_nulls_next_rcu(head));
+	while (is_a_nulls(head)) {
+		if (likely(get_nulls_value(head) == st->bucket)) {
+			if (++st->bucket >= st->htable_size)
+				return NULL;
+		}
+		head = rcu_dereference(
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
+	}
+	return head;
+}
+
+static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct hlist_nulls_node *head = ct_get_first(seq);
+
+	if (head)
+		while (pos && (head = ct_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	struct ct_iter_state *st = seq->private;
+
+	st->time_now = ktime_get_real_ns();
+	rcu_read_lock();
+
+	nf_conntrack_get_ht(&st->hash, &st->htable_size);
+	return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+	int ret;
+	u32 len;
+	char *secctx;
+
+	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+	if (ret)
+		return;
+
+	seq_printf(s, "secctx=%s ", secctx);
+
+	security_release_secctx(secctx, len);
+}
+#else
+static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+			 int dir)
+{
+	const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+	if (zone->dir != dir)
+		return;
+	switch (zone->dir) {
+	case NF_CT_DEFAULT_ZONE_DIR:
+		seq_printf(s, "zone=%u ", zone->id);
+		break;
+	case NF_CT_ZONE_DIR_ORIG:
+		seq_printf(s, "zone-orig=%u ", zone->id);
+		break;
+	case NF_CT_ZONE_DIR_REPL:
+		seq_printf(s, "zone-reply=%u ", zone->id);
+		break;
+	default:
+		break;
+	}
+}
+#else
+static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+				int dir)
+{
+}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+	struct ct_iter_state *st = s->private;
+	struct nf_conn_tstamp *tstamp;
+	s64 delta_time;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		delta_time = st->time_now - tstamp->start;
+		if (delta_time > 0)
+			delta_time = div_s64(delta_time, NSEC_PER_SEC);
+		else
+			delta_time = 0;
+
+		seq_printf(s, "delta-time=%llu ",
+			   (unsigned long long)delta_time);
+	}
+	return;
+}
+#else
+static inline void
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+}
+#endif
+
+static const char* l3proto_name(u16 proto)
+{
+	switch (proto) {
+	case AF_INET: return "ipv4";
+	case AF_INET6: return "ipv6";
+	}
+
+	return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+	switch (proto) {
+	case IPPROTO_ICMP: return "icmp";
+	case IPPROTO_TCP: return "tcp";
+	case IPPROTO_UDP: return "udp";
+	case IPPROTO_DCCP: return "dccp";
+	case IPPROTO_GRE: return "gre";
+	case IPPROTO_SCTP: return "sctp";
+	case IPPROTO_UDPLITE: return "udplite";
+	case IPPROTO_ICMPV6: return "icmpv6";
+	}
+
+	return "unknown";
+}
+
+static unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
+{
+	struct nf_conn_acct *acct;
+	struct nf_conn_counter *counter;
+
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
+		return 0;
+
+	counter = acct->counter;
+	seq_printf(s, "packets=%llu bytes=%llu ",
+		   (unsigned long long)atomic64_read(&counter[dir].packets),
+		   (unsigned long long)atomic64_read(&counter[dir].bytes));
+
+	return 0;
+}
+
+/* return 0 on success, 1 in case of error */
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+	struct nf_conntrack_tuple_hash *hash = v;
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+	const struct nf_conntrack_l4proto *l4proto;
+	struct net *net = seq_file_net(s);
+	int ret = 0;
+
+	WARN_ON(!ct);
+	if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
+		return 0;
+
+	/* load ->status after refcount increase */
+	smp_acquire__after_ctrl_dep();
+
+	if (nf_ct_should_gc(ct)) {
+		nf_ct_kill(ct);
+		goto release;
+	}
+
+	/* we only want to print DIR_ORIGINAL */
+	if (NF_CT_DIRECTION(hash))
+		goto release;
+
+	if (!net_eq(nf_ct_net(ct), net))
+		goto release;
+
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+
+	ret = -ENOSPC;
+	seq_printf(s, "%-8s %u %-8s %u ",
+		   l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
+		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
+
+	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
+		seq_printf(s, "%ld ", nf_ct_expires(ct)  / HZ);
+
+	if (l4proto->print_conntrack)
+		l4proto->print_conntrack(s, ct);
+
+	print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+		    l4proto);
+
+	ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
+
+	if (seq_has_overflowed(s))
+		goto release;
+
+	if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
+		goto release;
+
+	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+		seq_puts(s, "[UNREPLIED] ");
+
+	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
+
+	ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
+
+	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+		goto release;
+
+	if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
+		seq_puts(s, "[HW_OFFLOAD] ");
+	else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+		seq_puts(s, "[OFFLOAD] ");
+	else if (test_bit(IPS_ASSURED_BIT, &ct->status))
+		seq_puts(s, "[ASSURED] ");
+
+	if (seq_has_overflowed(s))
+		goto release;
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	seq_printf(s, "mark=%u ", READ_ONCE(ct->mark));
+#endif
+
+	ct_show_secctx(s, ct);
+	ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
+	ct_show_delta_time(s, ct);
+
+	seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
+
+	if (seq_has_overflowed(s))
+		goto release;
+
+	ret = 0;
+release:
+	nf_ct_put(ct);
+	return ret;
+}
+
+static const struct seq_operations ct_seq_ops = {
+	.start = ct_seq_start,
+	.next  = ct_seq_next,
+	.stop  = ct_seq_stop,
+	.show  = ct_seq_show
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(net->ct.stat, cpu);
+	}
+
+	return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	int cpu;
+
+	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(net->ct.stat, cpu);
+	}
+	(*pos)++;
+	return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	struct net *net = seq_file_net(seq);
+	const struct ip_conntrack_stat *st = v;
+	unsigned int nr_conntracks;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "entries  clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
+		return 0;
+	}
+
+	nr_conntracks = nf_conntrack_count(net);
+
+	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
+		   nr_conntracks,
+		   st->clash_resolve,
+		   st->found,
+		   0,
+		   st->invalid,
+		   0,
+		   0,
+		   st->chaintoolong,
+		   st->insert,
+		   st->insert_failed,
+		   st->drop,
+		   st->early_drop,
+		   st->error,
+
+		   st->expect_new,
+		   st->expect_create,
+		   st->expect_delete,
+		   st->search_restart
+		);
+	return 0;
+}
+
+static const struct seq_operations ct_cpu_seq_ops = {
+	.start	= ct_cpu_seq_start,
+	.next	= ct_cpu_seq_next,
+	.stop	= ct_cpu_seq_stop,
+	.show	= ct_cpu_seq_show,
+};
+
+static int nf_conntrack_standalone_init_proc(struct net *net)
+{
+	struct proc_dir_entry *pde;
+	kuid_t root_uid;
+	kgid_t root_gid;
+
+	pde = proc_create_net("nf_conntrack", 0440, net->proc_net, &ct_seq_ops,
+			sizeof(struct ct_iter_state));
+	if (!pde)
+		goto out_nf_conntrack;
+
+	root_uid = make_kuid(net->user_ns, 0);
+	root_gid = make_kgid(net->user_ns, 0);
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(pde, root_uid, root_gid);
+
+	pde = proc_create_net("nf_conntrack", 0444, net->proc_net_stat,
+			&ct_cpu_seq_ops, sizeof(struct seq_net_private));
+	if (!pde)
+		goto out_stat_nf_conntrack;
+	return 0;
+
+out_stat_nf_conntrack:
+	remove_proc_entry("nf_conntrack", net->proc_net);
+out_nf_conntrack:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_standalone_fini_proc(struct net *net)
+{
+	remove_proc_entry("nf_conntrack", net->proc_net_stat);
+	remove_proc_entry("nf_conntrack", net->proc_net);
+}
+#else
+static int nf_conntrack_standalone_init_proc(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_standalone_fini_proc(struct net *net)
+{
+}
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
+
+u32 nf_conntrack_count(const struct net *net)
+{
+	const struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+	return atomic_read(&cnet->count);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_count);
+
+/* Sysctl support */
+
+#ifdef CONFIG_SYSCTL
+/* size the user *wants to set */
+static unsigned int nf_conntrack_htable_size_user __read_mostly;
+
+static int
+nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+			 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	/* module_param hashsize could have changed value */
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (ret < 0 || !write)
+		return ret;
+
+	/* update ret, we might not be able to satisfy request */
+	ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+
+	/* update it to the actual value used by conntrack */
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+	return ret;
+}
+
+static struct ctl_table_header *nf_ct_netfilter_header;
+
+enum nf_ct_sysctl_index {
+	NF_SYSCTL_CT_MAX,
+	NF_SYSCTL_CT_COUNT,
+	NF_SYSCTL_CT_BUCKETS,
+	NF_SYSCTL_CT_CHECKSUM,
+	NF_SYSCTL_CT_LOG_INVALID,
+	NF_SYSCTL_CT_EXPECT_MAX,
+	NF_SYSCTL_CT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	NF_SYSCTL_CT_EVENTS,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	NF_SYSCTL_CT_TIMESTAMP,
+#endif
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+#endif
+	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+	NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST,
+	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+#endif
+	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT,
+	NF_SYSCTL_CT_PROTO_DCCP_LOOSE,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
+#endif
+#ifdef CONFIG_LWTUNNEL
+	NF_SYSCTL_CT_LWTUNNEL,
+#endif
+
+	__NF_SYSCTL_CT_LAST_SYSCTL,
+};
+
+#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
+
+static struct ctl_table nf_ct_sysctl_table[] = {
+	[NF_SYSCTL_CT_MAX] = {
+		.procname	= "nf_conntrack_max",
+		.data		= &nf_conntrack_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	[NF_SYSCTL_CT_COUNT] = {
+		.procname	= "nf_conntrack_count",
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	[NF_SYSCTL_CT_BUCKETS] = {
+		.procname       = "nf_conntrack_buckets",
+		.data           = &nf_conntrack_htable_size_user,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = nf_conntrack_hash_sysctl,
+	},
+	[NF_SYSCTL_CT_CHECKSUM] = {
+		.procname	= "nf_conntrack_checksum",
+		.data		= &init_net.ct.sysctl_checksum,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_LOG_INVALID] = {
+		.procname	= "nf_conntrack_log_invalid",
+		.data		= &init_net.ct.sysctl_log_invalid,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+	},
+	[NF_SYSCTL_CT_EXPECT_MAX] = {
+		.procname	= "nf_conntrack_expect_max",
+		.data		= &nf_ct_expect_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	[NF_SYSCTL_CT_ACCT] = {
+		.procname	= "nf_conntrack_acct",
+		.data		= &init_net.ct.sysctl_acct,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	[NF_SYSCTL_CT_EVENTS] = {
+		.procname	= "nf_conntrack_events",
+		.data		= &init_net.ct.sysctl_events,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2		= SYSCTL_TWO,
+	},
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	[NF_SYSCTL_CT_TIMESTAMP] = {
+		.procname	= "nf_conntrack_timestamp",
+		.data		= &init_net.ct.sysctl_tstamp,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#endif
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
+		.procname	= "nf_conntrack_generic_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = {
+		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = {
+		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = {
+		.procname	= "nf_conntrack_tcp_timeout_established",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_close_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = {
+		.procname	= "nf_conntrack_tcp_timeout_last_ack",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = {
+		.procname	= "nf_conntrack_tcp_timeout_time_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = {
+		.procname	= "nf_conntrack_tcp_timeout_close",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = {
+		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = {
+		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
+		.procname	= "nf_flowtable_tcp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
+	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+		.procname	= "nf_conntrack_tcp_loose",
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
+		.procname       = "nf_conntrack_tcp_be_liberal",
+		.maxlen		= sizeof(u8),
+		.mode           = 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = {
+		.procname	= "nf_conntrack_tcp_ignore_invalid_rst",
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
+		.procname	= "nf_conntrack_tcp_max_retrans",
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
+		.procname	= "nf_conntrack_udp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = {
+		.procname	= "nf_conntrack_udp_timeout_stream",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
+		.procname	= "nf_flowtable_udp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+		.procname	= "nf_conntrack_icmp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = {
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = {
+		.procname	= "nf_conntrack_sctp_timeout_closed",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = {
+		.procname	= "nf_conntrack_sctp_timeout_cookie_wait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = {
+		.procname	= "nf_conntrack_sctp_timeout_cookie_echoed",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = {
+		.procname	= "nf_conntrack_sctp_timeout_established",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_recd",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_shutdown_ack_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = {
+		.procname	= "nf_conntrack_sctp_timeout_heartbeat_sent",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
+		.procname	= "nf_conntrack_dccp_timeout_request",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = {
+		.procname	= "nf_conntrack_dccp_timeout_respond",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = {
+		.procname	= "nf_conntrack_dccp_timeout_partopen",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = {
+		.procname	= "nf_conntrack_dccp_timeout_open",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = {
+		.procname	= "nf_conntrack_dccp_timeout_closereq",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = {
+		.procname	= "nf_conntrack_dccp_timeout_closing",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = {
+		.procname	= "nf_conntrack_dccp_timeout_timewait",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
+		.procname	= "nf_conntrack_dccp_loose",
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1 	= SYSCTL_ZERO,
+		.extra2 	= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = {
+		.procname       = "nf_conntrack_gre_timeout",
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = {
+		.procname       = "nf_conntrack_gre_timeout_stream",
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_jiffies,
+	},
+#endif
+#ifdef CONFIG_LWTUNNEL
+	[NF_SYSCTL_CT_LWTUNNEL] = {
+		.procname	= "nf_hooks_lwtunnel",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= nf_hooks_lwtunnel_sysctl_handler,
+	},
+#endif
+	{}
+};
+
+static struct ctl_table nf_ct_netfilter_table[] = {
+	{
+		.procname	= "nf_conntrack_max",
+		.data		= &nf_conntrack_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ }
+};
+
+static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
+						    struct ctl_table *table)
+{
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+#define XASSIGN(XNAME, tn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \
+			&(tn)->timeouts[TCP_CONNTRACK_ ## XNAME]
+
+	XASSIGN(SYN_SENT, tn);
+	XASSIGN(SYN_RECV, tn);
+	XASSIGN(ESTABLISHED, tn);
+	XASSIGN(FIN_WAIT, tn);
+	XASSIGN(CLOSE_WAIT, tn);
+	XASSIGN(LAST_ACK, tn);
+	XASSIGN(TIME_WAIT, tn);
+	XASSIGN(CLOSE, tn);
+	XASSIGN(RETRANS, tn);
+	XASSIGN(UNACK, tn);
+#undef XASSIGN
+#define XASSIGN(XNAME, rval) \
+	table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval)
+
+	XASSIGN(LOOSE, &tn->tcp_loose);
+	XASSIGN(LIBERAL, &tn->tcp_be_liberal);
+	XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+	XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst);
+#undef XASSIGN
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+#endif
+
+}
+
+static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+						     struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
+
+#define XASSIGN(XNAME, sn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \
+			&(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME]
+
+	XASSIGN(CLOSED, sn);
+	XASSIGN(COOKIE_WAIT, sn);
+	XASSIGN(COOKIE_ECHOED, sn);
+	XASSIGN(ESTABLISHED, sn);
+	XASSIGN(SHUTDOWN_SENT, sn);
+	XASSIGN(SHUTDOWN_RECD, sn);
+	XASSIGN(SHUTDOWN_ACK_SENT, sn);
+	XASSIGN(HEARTBEAT_SENT, sn);
+#undef XASSIGN
+#endif
+}
+
+static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net,
+						     struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
+
+#define XASSIGN(XNAME, dn) \
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \
+			&(dn)->dccp_timeout[CT_DCCP_ ## XNAME]
+
+	XASSIGN(REQUEST, dn);
+	XASSIGN(RESPOND, dn);
+	XASSIGN(PARTOPEN, dn);
+	XASSIGN(OPEN, dn);
+	XASSIGN(CLOSEREQ, dn);
+	XASSIGN(CLOSING, dn);
+	XASSIGN(TIMEWAIT, dn);
+#undef XASSIGN
+
+	table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose;
+#endif
+}
+
+static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
+						    struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	struct nf_gre_net *gn = nf_gre_pernet(net);
+
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED];
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED];
+#endif
+}
+
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	struct nf_udp_net *un = nf_udp_pernet(net);
+	struct ctl_table *table;
+
+	BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL);
+
+	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	table[NF_SYSCTL_CT_COUNT].data = &cnet->count;
+	table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
+	table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
+	table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp;
+#endif
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+#endif
+
+	nf_conntrack_standalone_init_tcp_sysctl(net, table);
+	nf_conntrack_standalone_init_sctp_sysctl(net, table);
+	nf_conntrack_standalone_init_dccp_sysctl(net, table);
+	nf_conntrack_standalone_init_gre_sysctl(net, table);
+
+	/* Don't allow non-init_net ns to alter global sysctls */
+	if (!net_eq(&init_net, net)) {
+		table[NF_SYSCTL_CT_MAX].mode = 0444;
+		table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
+		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
+	}
+
+	cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
+	if (!cnet->sysctl_header)
+		goto out_unregister_netfilter;
+
+	return 0;
+
+out_unregister_netfilter:
+	kfree(table);
+	return -ENOMEM;
+}
+
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
+{
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+	struct ctl_table *table;
+
+	table = cnet->sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(cnet->sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+static void nf_conntrack_fini_net(struct net *net)
+{
+	if (enable_hooks)
+		nf_ct_netns_put(net, NFPROTO_INET);
+
+	nf_conntrack_standalone_fini_proc(net);
+	nf_conntrack_standalone_fini_sysctl(net);
+}
+
+static int nf_conntrack_pernet_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_checksum = 1;
+
+	ret = nf_conntrack_standalone_init_sysctl(net);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_conntrack_standalone_init_proc(net);
+	if (ret < 0)
+		goto out_proc;
+
+	ret = nf_conntrack_init_net(net);
+	if (ret < 0)
+		goto out_init_net;
+
+	if (enable_hooks) {
+		ret = nf_ct_netns_get(net, NFPROTO_INET);
+		if (ret < 0)
+			goto out_hooks;
+	}
+
+	return 0;
+
+out_hooks:
+	nf_conntrack_cleanup_net(net);
+out_init_net:
+	nf_conntrack_standalone_fini_proc(net);
+out_proc:
+	nf_conntrack_standalone_fini_sysctl(net);
+	return ret;
+}
+
+static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list)
+		nf_conntrack_fini_net(net);
+
+	nf_conntrack_cleanup_net_list(net_exit_list);
+}
+
+static struct pernet_operations nf_conntrack_net_ops = {
+	.init		= nf_conntrack_pernet_init,
+	.exit_batch	= nf_conntrack_pernet_exit,
+	.id		= &nf_conntrack_net_id,
+	.size = sizeof(struct nf_conntrack_net),
+};
+
+static int __init nf_conntrack_standalone_init(void)
+{
+	int ret = nf_conntrack_init_start();
+	if (ret < 0)
+		goto out_start;
+
+	BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
+
+#ifdef CONFIG_SYSCTL
+	nf_ct_netfilter_header =
+		register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
+	if (!nf_ct_netfilter_header) {
+		pr_err("nf_conntrack: can't register to sysctl.\n");
+		ret = -ENOMEM;
+		goto out_sysctl;
+	}
+
+	nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+#endif
+
+	nf_conntrack_init_end();
+
+	ret = register_pernet_subsys(&nf_conntrack_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
+	return 0;
+
+out_pernet:
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+out_sysctl:
+#endif
+	nf_conntrack_cleanup_end();
+out_start:
+	return ret;
+}
+
+static void __exit nf_conntrack_standalone_fini(void)
+{
+	nf_conntrack_cleanup_start();
+	unregister_pernet_subsys(&nf_conntrack_net_ops);
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(nf_ct_netfilter_header);
+#endif
+	nf_conntrack_cleanup_end();
+}
+
+module_init(nf_conntrack_standalone_init);
+module_exit(nf_conntrack_standalone_fini);
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
new file mode 100644
index 000000000..80ee53f29
--- /dev/null
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+#define HELPER_NAME "tftp"
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_tftp");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
+
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
+				 enum ip_conntrack_info ctinfo,
+				 struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff *skb,
+		     unsigned int protoff,
+		     struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo)
+{
+	const struct tftphdr *tfh;
+	struct tftphdr _tftph;
+	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_tuple *tuple;
+	unsigned int ret = NF_ACCEPT;
+	typeof(nf_nat_tftp_hook) nf_nat_tftp;
+
+	tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
+				 sizeof(_tftph), &_tftph);
+	if (tfh == NULL)
+		return NF_ACCEPT;
+
+	switch (ntohs(tfh->opcode)) {
+	case TFTP_OPCODE_READ:
+	case TFTP_OPCODE_WRITE:
+		/* RRQ and WRQ works the same way */
+		nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+		exp = nf_ct_expect_alloc(ct);
+		if (exp == NULL) {
+			nf_ct_helper_log(skb, ct, "cannot alloc expectation");
+			return NF_DROP;
+		}
+		tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+				  nf_ct_l3num(ct),
+				  &tuple->src.u3, &tuple->dst.u3,
+				  IPPROTO_UDP, NULL, &tuple->dst.u.udp.port);
+
+		pr_debug("expect: ");
+		nf_ct_dump_tuple(&exp->tuple);
+
+		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
+		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
+			ret = nf_nat_tftp(skb, ctinfo, exp);
+		else if (nf_ct_expect_related(exp, 0) != 0) {
+			nf_ct_helper_log(skb, ct, "cannot add expectation");
+			ret = NF_DROP;
+		}
+		nf_ct_expect_put(exp);
+		break;
+	case TFTP_OPCODE_DATA:
+	case TFTP_OPCODE_ACK:
+		pr_debug("Data/ACK opcode\n");
+		break;
+	case TFTP_OPCODE_ERROR:
+		pr_debug("Error opcode\n");
+		break;
+	default:
+		pr_debug("Unknown opcode\n");
+	}
+	return ret;
+}
+
+static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly;
+
+static const struct nf_conntrack_expect_policy tftp_exp_policy = {
+	.max_expected	= 1,
+	.timeout	= 5 * 60,
+};
+
+static void __exit nf_conntrack_tftp_fini(void)
+{
+	nf_conntrack_helpers_unregister(tftp, ports_c * 2);
+}
+
+static int __init nf_conntrack_tftp_init(void)
+{
+	int i, ret;
+
+	NF_CT_HELPER_BUILD_BUG_ON(0);
+
+	if (ports_c == 0)
+		ports[ports_c++] = TFTP_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP,
+				  HELPER_NAME, TFTP_PORT, ports[i], i,
+				  &tftp_exp_policy, 0, tftp_help, NULL,
+				  THIS_MODULE);
+		nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP,
+				  HELPER_NAME, TFTP_PORT, ports[i], i,
+				  &tftp_exp_policy, 0, tftp_help, NULL,
+				  THIS_MODULE);
+	}
+
+	ret = nf_conntrack_helpers_register(tftp, ports_c * 2);
+	if (ret < 0) {
+		pr_err("failed to register helpers\n");
+		return ret;
+	}
+	return 0;
+}
+
+module_init(nf_conntrack_tftp_init);
+module_exit(nf_conntrack_tftp_fini);
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
new file mode 100644
index 000000000..0cc584d3d
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+
+const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_timeout_hook);
+
+static int untimeout(struct nf_conn *ct, void *timeout)
+{
+	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+	if (timeout_ext) {
+		const struct nf_ct_timeout *t;
+
+		t = rcu_access_pointer(timeout_ext->timeout);
+
+		if (!timeout || t == timeout)
+			RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+	}
+
+	/* We are not intended to delete this conntrack. */
+	return 0;
+}
+
+void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout)
+{
+	struct nf_ct_iter_data iter_data = {
+		.net	= net,
+		.data	= timeout,
+	};
+
+	nf_ct_iterate_cleanup_net(untimeout, &iter_data);
+}
+EXPORT_SYMBOL_GPL(nf_ct_untimeout);
+
+static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout)
+{
+	const struct nf_ct_timeout_hooks *h = rcu_dereference(nf_ct_timeout_hook);
+
+	if (h)
+		h->timeout_put(timeout);
+}
+
+int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
+		      u8 l3num, u8 l4num, const char *timeout_name)
+{
+	const struct nf_ct_timeout_hooks *h;
+	struct nf_ct_timeout *timeout;
+	struct nf_conn_timeout *timeout_ext;
+	const char *errmsg = NULL;
+	int ret = 0;
+
+	rcu_read_lock();
+	h = rcu_dereference(nf_ct_timeout_hook);
+	if (!h) {
+		ret = -ENOENT;
+		errmsg = "Timeout policy base is empty";
+		goto out;
+	}
+
+	timeout = h->timeout_find_get(net, timeout_name);
+	if (!timeout) {
+		ret = -ENOENT;
+		pr_info_ratelimited("No such timeout policy \"%s\"\n",
+				    timeout_name);
+		goto out;
+	}
+
+	if (timeout->l3num != l3num) {
+		ret = -EINVAL;
+		pr_info_ratelimited("Timeout policy `%s' can only be used by "
+				    "L%d protocol number %d\n",
+				    timeout_name, 3, timeout->l3num);
+		goto err_put_timeout;
+	}
+	/* Make sure the timeout policy matches any existing protocol tracker,
+	 * otherwise default to generic.
+	 */
+	if (timeout->l4proto->l4proto != l4num) {
+		ret = -EINVAL;
+		pr_info_ratelimited("Timeout policy `%s' can only be used by "
+				    "L%d protocol number %d\n",
+				    timeout_name, 4, timeout->l4proto->l4proto);
+		goto err_put_timeout;
+	}
+	timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
+	if (!timeout_ext) {
+		ret = -ENOMEM;
+		goto err_put_timeout;
+	}
+
+	rcu_read_unlock();
+	return ret;
+
+err_put_timeout:
+	__nf_ct_timeout_put(timeout);
+out:
+	rcu_read_unlock();
+	if (errmsg)
+		pr_info_ratelimited("%s\n", errmsg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_set_timeout);
+
+void nf_ct_destroy_timeout(struct nf_conn *ct)
+{
+	struct nf_conn_timeout *timeout_ext;
+	const struct nf_ct_timeout_hooks *h;
+
+	rcu_read_lock();
+	h = rcu_dereference(nf_ct_timeout_hook);
+
+	if (h) {
+		timeout_ext = nf_ct_timeout_find(ct);
+		if (timeout_ext) {
+			struct nf_ct_timeout *t;
+
+			t = rcu_dereference(timeout_ext->timeout);
+			if (t)
+				h->timeout_put(t);
+			RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+		}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout);
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 000000000..9e43a0a59
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static bool nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+void nf_conntrack_tstamp_pernet_init(struct net *net)
+{
+	net->ct.sysctl_tstamp = nf_ct_tstamp;
+}
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
new file mode 100644
index 000000000..a8e2425e4
--- /dev/null
+++ b/net/netfilter/nf_dup_netdev.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+#define NF_RECURSION_LIMIT	2
+
+static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
+				enum nf_dev_hooks hook)
+{
+	if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+		goto err;
+
+	if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
+		if (skb_cow_head(skb, skb->mac_len))
+			goto err;
+
+		skb_push(skb, skb->mac_len);
+	}
+
+	skb->dev = dev;
+	skb_clear_tstamp(skb);
+	__this_cpu_inc(nf_dup_skb_recursion);
+	dev_queue_xmit(skb);
+	__this_cpu_dec(nf_dup_skb_recursion);
+	return;
+err:
+	kfree_skb(skb);
+}
+
+void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+	if (!dev) {
+		kfree_skb(pkt->skb);
+		return;
+	}
+
+	nf_do_netdev_egress(pkt->skb, dev, nft_hook(pkt));
+}
+EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
+
+void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+	struct net_device *dev;
+	struct sk_buff *skb;
+
+	dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+	if (dev == NULL)
+		return;
+
+	skb = skb_clone(pkt->skb, GFP_ATOMIC);
+	if (skb)
+		nf_do_netdev_egress(skb, dev, nft_hook(pkt));
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
+
+int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       enum flow_action_id id, int oif)
+{
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+
+	/* nft_flow_rule_destroy() releases the reference on this device. */
+	dev = dev_get_by_index(ctx->net, oif);
+	if (!dev)
+		return -EOPNOTSUPP;
+
+	entry = &flow->rule->action.entries[ctx->num_actions++];
+	entry->id = id;
+	entry->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter packet duplication support");
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
new file mode 100644
index 000000000..c1d99cb37
--- /dev/null
+++ b/net/netfilter/nf_flow_table_core.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+static DEFINE_MUTEX(flowtable_lock);
+static LIST_HEAD(flowtables);
+
+static void
+flow_offload_fill_dir(struct flow_offload *flow,
+		      enum flow_offload_tuple_dir dir)
+{
+	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+	struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
+
+	ft->dir = dir;
+
+	switch (ctt->src.l3num) {
+	case NFPROTO_IPV4:
+		ft->src_v4 = ctt->src.u3.in;
+		ft->dst_v4 = ctt->dst.u3.in;
+		break;
+	case NFPROTO_IPV6:
+		ft->src_v6 = ctt->src.u3.in6;
+		ft->dst_v6 = ctt->dst.u3.in6;
+		break;
+	}
+
+	ft->l3proto = ctt->src.l3num;
+	ft->l4proto = ctt->dst.protonum;
+
+	switch (ctt->dst.protonum) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		ft->src_port = ctt->src.u.tcp.port;
+		ft->dst_port = ctt->dst.u.tcp.port;
+		break;
+	}
+}
+
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
+{
+	struct flow_offload *flow;
+
+	if (unlikely(nf_ct_is_dying(ct)))
+		return NULL;
+
+	flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+	if (!flow)
+		return NULL;
+
+	refcount_inc(&ct->ct_general.use);
+	flow->ct = ct;
+
+	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
+
+	if (ct->status & IPS_SRC_NAT)
+		__set_bit(NF_FLOW_SNAT, &flow->flags);
+	if (ct->status & IPS_DST_NAT)
+		__set_bit(NF_FLOW_DNAT, &flow->flags);
+
+	return flow;
+}
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+{
+	const struct rt6_info *rt;
+
+	if (flow_tuple->l3proto == NFPROTO_IPV6) {
+		rt = (const struct rt6_info *)flow_tuple->dst_cache;
+		return rt6_get_cookie(rt);
+	}
+
+	return 0;
+}
+
+static int flow_offload_fill_route(struct flow_offload *flow,
+				   const struct nf_flow_route *route,
+				   enum flow_offload_tuple_dir dir)
+{
+	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+	struct dst_entry *dst = route->tuple[dir].dst;
+	int i, j = 0;
+
+	switch (flow_tuple->l3proto) {
+	case NFPROTO_IPV4:
+		flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
+		break;
+	case NFPROTO_IPV6:
+		flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
+		break;
+	}
+
+	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
+		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
+		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
+		if (route->tuple[dir].in.ingress_vlans & BIT(i))
+			flow_tuple->in_vlan_ingress |= BIT(j);
+		j++;
+	}
+	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+
+	switch (route->tuple[dir].xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+		       ETH_ALEN);
+		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+		       ETH_ALEN);
+		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+		flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+		break;
+	case FLOW_OFFLOAD_XMIT_XFRM:
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		if (!dst_hold_safe(route->tuple[dir].dst))
+			return -1;
+
+		flow_tuple->dst_cache = dst;
+		flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
+
+	return 0;
+}
+
+static void nft_flow_dst_release(struct flow_offload *flow,
+				 enum flow_offload_tuple_dir dir)
+{
+	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+		dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
+int flow_offload_route_init(struct flow_offload *flow,
+			    const struct nf_flow_route *route)
+{
+	int err;
+
+	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+	if (err < 0)
+		return err;
+
+	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
+	if (err < 0)
+		goto err_route_reply;
+
+	flow->type = NF_FLOW_OFFLOAD_ROUTE;
+
+	return 0;
+
+err_route_reply:
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_offload_route_init);
+
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+{
+	tcp->seen[0].td_maxwin = 0;
+	tcp->seen[1].td_maxwin = 0;
+}
+
+static void flow_offload_fixup_ct(struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	int l4num = nf_ct_protonum(ct);
+	s32 timeout;
+
+	if (l4num == IPPROTO_TCP) {
+		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+
+		timeout = tn->timeouts[ct->proto.tcp.state];
+		timeout -= tn->offload_timeout;
+	} else if (l4num == IPPROTO_UDP) {
+		struct nf_udp_net *tn = nf_udp_pernet(net);
+
+		timeout = tn->timeouts[UDP_CT_REPLIED];
+		timeout -= tn->offload_timeout;
+	} else {
+		return;
+	}
+
+	if (timeout < 0)
+		timeout = 0;
+
+	if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
+		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+}
+
+static void flow_offload_route_release(struct flow_offload *flow)
+{
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
+}
+
+void flow_offload_free(struct flow_offload *flow)
+{
+	switch (flow->type) {
+	case NF_FLOW_OFFLOAD_ROUTE:
+		flow_offload_route_release(flow);
+		break;
+	default:
+		break;
+	}
+	nf_ct_put(flow->ct);
+	kfree_rcu(flow, rcu_head);
+}
+EXPORT_SYMBOL_GPL(flow_offload_free);
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+	const struct flow_offload_tuple *tuple = data;
+
+	return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct flow_offload_tuple_rhash *tuplehash = data;
+
+	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+					const void *ptr)
+{
+	const struct flow_offload_tuple *tuple = arg->key;
+	const struct flow_offload_tuple_rhash *x = ptr;
+
+	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
+		return 1;
+
+	return 0;
+}
+
+static const struct rhashtable_params nf_flow_offload_rhash_params = {
+	.head_offset		= offsetof(struct flow_offload_tuple_rhash, node),
+	.hashfn			= flow_offload_hash,
+	.obj_hashfn		= flow_offload_hash_obj,
+	.obj_cmpfn		= flow_offload_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+{
+	unsigned long timeout = NF_FLOW_TIMEOUT;
+	struct net *net = nf_ct_net(flow->ct);
+	int l4num = nf_ct_protonum(flow->ct);
+
+	if (l4num == IPPROTO_TCP) {
+		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+		timeout = tn->offload_timeout;
+	} else if (l4num == IPPROTO_UDP) {
+		struct nf_udp_net *tn = nf_udp_pernet(net);
+
+		timeout = tn->offload_timeout;
+	}
+
+	return timeout;
+}
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+{
+	int err;
+
+	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+
+	err = rhashtable_insert_fast(&flow_table->rhashtable,
+				     &flow->tuplehash[0].node,
+				     nf_flow_offload_rhash_params);
+	if (err < 0)
+		return err;
+
+	err = rhashtable_insert_fast(&flow_table->rhashtable,
+				     &flow->tuplehash[1].node,
+				     nf_flow_offload_rhash_params);
+	if (err < 0) {
+		rhashtable_remove_fast(&flow_table->rhashtable,
+				       &flow->tuplehash[0].node,
+				       nf_flow_offload_rhash_params);
+		return err;
+	}
+
+	nf_ct_offload_timeout(flow->ct);
+
+	if (nf_flowtable_hw_offload(flow_table)) {
+		__set_bit(NF_FLOW_HW, &flow->flags);
+		nf_flow_offload_add(flow_table, flow);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(flow_offload_add);
+
+void flow_offload_refresh(struct nf_flowtable *flow_table,
+			  struct flow_offload *flow, bool force)
+{
+	u32 timeout;
+
+	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+	if (force || timeout - READ_ONCE(flow->timeout) > HZ)
+		WRITE_ONCE(flow->timeout, timeout);
+	else
+		return;
+
+	if (likely(!nf_flowtable_hw_offload(flow_table)))
+		return;
+
+	nf_flow_offload_add(flow_table, flow);
+}
+EXPORT_SYMBOL_GPL(flow_offload_refresh);
+
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+	return nf_flow_timeout_delta(flow->timeout) <= 0;
+}
+
+static void flow_offload_del(struct nf_flowtable *flow_table,
+			     struct flow_offload *flow)
+{
+	rhashtable_remove_fast(&flow_table->rhashtable,
+			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+			       nf_flow_offload_rhash_params);
+	rhashtable_remove_fast(&flow_table->rhashtable,
+			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+			       nf_flow_offload_rhash_params);
+	flow_offload_free(flow);
+}
+
+void flow_offload_teardown(struct flow_offload *flow)
+{
+	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+	flow_offload_fixup_ct(flow->ct);
+}
+EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
+struct flow_offload_tuple_rhash *
+flow_offload_lookup(struct nf_flowtable *flow_table,
+		    struct flow_offload_tuple *tuple)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct flow_offload *flow;
+	int dir;
+
+	tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+				      nf_flow_offload_rhash_params);
+	if (!tuplehash)
+		return NULL;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
+		return NULL;
+
+	if (unlikely(nf_ct_is_dying(flow->ct)))
+		return NULL;
+
+	return tuplehash;
+}
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+static int
+nf_flow_table_iterate(struct nf_flowtable *flow_table,
+		      void (*iter)(struct nf_flowtable *flowtable,
+				   struct flow_offload *flow, void *data),
+		      void *data)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct rhashtable_iter hti;
+	struct flow_offload *flow;
+	int err = 0;
+
+	rhashtable_walk_enter(&flow_table->rhashtable, &hti);
+	rhashtable_walk_start(&hti);
+
+	while ((tuplehash = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(tuplehash)) {
+			if (PTR_ERR(tuplehash) != -EAGAIN) {
+				err = PTR_ERR(tuplehash);
+				break;
+			}
+			continue;
+		}
+		if (tuplehash->tuple.dir)
+			continue;
+
+		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+		iter(flow_table, flow, data);
+	}
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+
+	return err;
+}
+
+static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
+			      const struct flow_offload *flow)
+{
+	return flow_table->type->gc && flow_table->type->gc(flow);
+}
+
+static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
+				    struct flow_offload *flow, void *data)
+{
+	if (nf_flow_has_expired(flow) ||
+	    nf_ct_is_dying(flow->ct) ||
+	    nf_flow_custom_gc(flow_table, flow))
+		flow_offload_teardown(flow);
+
+	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+		if (test_bit(NF_FLOW_HW, &flow->flags)) {
+			if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
+				nf_flow_offload_del(flow_table, flow);
+			else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
+				flow_offload_del(flow_table, flow);
+		} else {
+			flow_offload_del(flow_table, flow);
+		}
+	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
+		nf_flow_offload_stats(flow_table, flow);
+	}
+}
+
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
+{
+	nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+}
+
+static void nf_flow_offload_work_gc(struct work_struct *work)
+{
+	struct nf_flowtable *flow_table;
+
+	flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+	nf_flow_table_gc_run(flow_table);
+	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+}
+
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
+{
+	struct tcphdr *tcph;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
+}
+
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
+{
+	struct udphdr *udph;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace2(&udph->check, skb, port,
+					 new_port, false);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+}
+
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+			     u8 protocol, __be16 port, __be16 new_port)
+{
+	switch (protocol) {
+	case IPPROTO_TCP:
+		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
+		break;
+	case IPPROTO_UDP:
+		nf_flow_nat_port_udp(skb, thoff, port, new_port);
+		break;
+	}
+}
+
+void nf_flow_snat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir)
+{
+	struct flow_ports *hdr;
+	__be16 port, new_port;
+
+	hdr = (void *)(skb_network_header(skb) + thoff);
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		port = hdr->source;
+		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+		hdr->source = new_port;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		port = hdr->dest;
+		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+		hdr->dest = new_port;
+		break;
+	}
+
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+		       unsigned int thoff, u8 protocol,
+		       enum flow_offload_tuple_dir dir)
+{
+	struct flow_ports *hdr;
+	__be16 port, new_port;
+
+	hdr = (void *)(skb_network_header(skb) + thoff);
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		port = hdr->dest;
+		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+		hdr->dest = new_port;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		port = hdr->source;
+		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+		hdr->source = new_port;
+		break;
+	}
+
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+int nf_flow_table_init(struct nf_flowtable *flowtable)
+{
+	int err;
+
+	INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+	flow_block_init(&flowtable->flow_block);
+	init_rwsem(&flowtable->flow_block_lock);
+
+	err = rhashtable_init(&flowtable->rhashtable,
+			      &nf_flow_offload_rhash_params);
+	if (err < 0)
+		return err;
+
+	queue_delayed_work(system_power_efficient_wq,
+			   &flowtable->gc_work, HZ);
+
+	mutex_lock(&flowtable_lock);
+	list_add(&flowtable->list, &flowtables);
+	mutex_unlock(&flowtable_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_init);
+
+static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
+				     struct flow_offload *flow, void *data)
+{
+	struct net_device *dev = data;
+
+	if (!dev) {
+		flow_offload_teardown(flow);
+		return;
+	}
+
+	if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
+	    (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+	     flow->tuplehash[1].tuple.iifidx == dev->ifindex))
+		flow_offload_teardown(flow);
+}
+
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+			      struct net_device *dev)
+{
+	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+	flush_delayed_work(&flowtable->gc_work);
+	nf_flow_table_offload_flush(flowtable);
+}
+
+void nf_flow_table_cleanup(struct net_device *dev)
+{
+	struct nf_flowtable *flowtable;
+
+	mutex_lock(&flowtable_lock);
+	list_for_each_entry(flowtable, &flowtables, list)
+		nf_flow_table_gc_cleanup(flowtable, dev);
+	mutex_unlock(&flowtable_lock);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
+void nf_flow_table_free(struct nf_flowtable *flow_table)
+{
+	mutex_lock(&flowtable_lock);
+	list_del(&flow_table->list);
+	mutex_unlock(&flowtable_lock);
+
+	cancel_delayed_work_sync(&flow_table->gc_work);
+	nf_flow_table_offload_flush(flow_table);
+	/* ... no more pending work after this stage ... */
+	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+	nf_flow_table_gc_run(flow_table);
+	nf_flow_table_offload_flush_cleanup(flow_table);
+	rhashtable_destroy(&flow_table->rhashtable);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
+static int nf_flow_table_init_net(struct net *net)
+{
+	net->ft.stat = alloc_percpu(struct nf_flow_table_stat);
+	return net->ft.stat ? 0 : -ENOMEM;
+}
+
+static void nf_flow_table_fini_net(struct net *net)
+{
+	free_percpu(net->ft.stat);
+}
+
+static int nf_flow_table_pernet_init(struct net *net)
+{
+	int ret;
+
+	ret = nf_flow_table_init_net(net);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_flow_table_init_proc(net);
+	if (ret < 0)
+		goto out_proc;
+
+	return 0;
+
+out_proc:
+	nf_flow_table_fini_net(net);
+	return ret;
+}
+
+static void nf_flow_table_pernet_exit(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nf_flow_table_fini_proc(net);
+		nf_flow_table_fini_net(net);
+	}
+}
+
+static struct pernet_operations nf_flow_table_net_ops = {
+	.init = nf_flow_table_pernet_init,
+	.exit_batch = nf_flow_table_pernet_exit,
+};
+
+static int __init nf_flow_table_module_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&nf_flow_table_net_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_flow_table_offload_init();
+	if (ret)
+		goto out_offload;
+
+	return 0;
+
+out_offload:
+	unregister_pernet_subsys(&nf_flow_table_net_ops);
+	return ret;
+}
+
+static void __exit nf_flow_table_module_exit(void)
+{
+	nf_flow_table_offload_exit();
+	unregister_pernet_subsys(&nf_flow_table_net_ops);
+}
+
+module_init(nf_flow_table_module_init);
+module_exit(nf_flow_table_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter flow table module");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
new file mode 100644
index 000000000..9505f9d18
--- /dev/null
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/if_vlan.h>
+
+static unsigned int
+nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+			  const struct nf_hook_state *state)
+{
+	struct vlan_ethhdr *veth;
+	__be16 proto;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		proto = veth->h_vlan_encapsulated_proto;
+		break;
+	case htons(ETH_P_PPP_SES):
+		proto = nf_flow_pppoe_proto(skb);
+		break;
+	default:
+		proto = skb->protocol;
+		break;
+	}
+
+	switch (proto) {
+	case htons(ETH_P_IP):
+		return nf_flow_offload_ip_hook(priv, skb, state);
+	case htons(ETH_P_IPV6):
+		return nf_flow_offload_ipv6_hook(priv, skb, state);
+	}
+
+	return NF_ACCEPT;
+}
+
+static int nf_flow_rule_route_inet(struct net *net,
+				   struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+	int err;
+
+	switch (flow_tuple->l3proto) {
+	case NFPROTO_IPV4:
+		err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
+		break;
+	case NFPROTO_IPV6:
+		err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
+		break;
+	default:
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+	.family		= NFPROTO_INET,
+	.init		= nf_flow_table_init,
+	.setup		= nf_flow_table_offload_setup,
+	.action		= nf_flow_rule_route_inet,
+	.free		= nf_flow_table_free,
+	.hook		= nf_flow_offload_inet_hook,
+	.owner		= THIS_MODULE,
+};
+
+static struct nf_flowtable_type flowtable_ipv4 = {
+	.family		= NFPROTO_IPV4,
+	.init		= nf_flow_table_init,
+	.setup		= nf_flow_table_offload_setup,
+	.action		= nf_flow_rule_route_ipv4,
+	.free		= nf_flow_table_free,
+	.hook		= nf_flow_offload_ip_hook,
+	.owner		= THIS_MODULE,
+};
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+	.family		= NFPROTO_IPV6,
+	.init		= nf_flow_table_init,
+	.setup		= nf_flow_table_offload_setup,
+	.action		= nf_flow_rule_route_ipv6,
+	.free		= nf_flow_table_free,
+	.hook		= nf_flow_offload_ipv6_hook,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nf_flow_inet_module_init(void)
+{
+	nft_register_flowtable_type(&flowtable_ipv4);
+	nft_register_flowtable_type(&flowtable_ipv6);
+	nft_register_flowtable_type(&flowtable_inet);
+
+	return 0;
+}
+
+static void __exit nf_flow_inet_module_exit(void)
+{
+	nft_unregister_flowtable_type(&flowtable_inet);
+	nft_unregister_flowtable_type(&flowtable_ipv6);
+	nft_unregister_flowtable_type(&flowtable_ipv4);
+}
+
+module_init(nf_flow_inet_module_init);
+module_exit(nf_flow_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
+MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
+MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module");
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
new file mode 100644
index 000000000..6feaac9ab
--- /dev/null
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -0,0 +1,689 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_state_check(struct flow_offload *flow, int proto,
+			       struct sk_buff *skb, unsigned int thoff)
+{
+	struct tcphdr *tcph;
+
+	if (proto != IPPROTO_TCP)
+		return 0;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	if (unlikely(tcph->fin || tcph->rst)) {
+		flow_offload_teardown(flow);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
+{
+	struct tcphdr *tcph;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+}
+
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
+{
+	struct udphdr *udph;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace4(&udph->check, skb, addr,
+					 new_addr, true);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+}
+
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+				   unsigned int thoff, __be32 addr,
+				   __be32 new_addr)
+{
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
+		break;
+	case IPPROTO_UDP:
+		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
+		break;
+	}
+}
+
+static void nf_flow_snat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
+{
+	__be32 addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = iph->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+		iph->saddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = iph->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+		iph->daddr = new_addr;
+		break;
+	}
+	csum_replace4(&iph->check, addr, new_addr);
+
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
+{
+	__be32 addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = iph->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+		iph->daddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = iph->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+		iph->saddr = new_addr;
+		break;
+	}
+	csum_replace4(&iph->check, addr, new_addr);
+
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+			  unsigned int thoff, enum flow_offload_tuple_dir dir,
+			  struct iphdr *iph)
+{
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
+	}
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+	return thoff != sizeof(struct iphdr);
+}
+
+static void nf_flow_tuple_encap(struct sk_buff *skb,
+				struct flow_offload_tuple *tuple)
+{
+	struct vlan_ethhdr *veth;
+	struct pppoe_hdr *phdr;
+	int i = 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		tuple->encap[i].id = skb_vlan_tag_get(skb);
+		tuple->encap[i].proto = skb->vlan_proto;
+		i++;
+	}
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
+		tuple->encap[i].proto = skb->protocol;
+		break;
+	case htons(ETH_P_PPP_SES):
+		phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+		tuple->encap[i].id = ntohs(phdr->sid);
+		tuple->encap[i].proto = skb->protocol;
+		break;
+	}
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+			    struct flow_offload_tuple *tuple, u32 *hdrsize,
+			    u32 offset)
+{
+	struct flow_ports *ports;
+	unsigned int thoff;
+	struct iphdr *iph;
+	u8 ipproto;
+
+	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+		return -1;
+
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4);
+
+	if (ip_is_fragment(iph) ||
+	    unlikely(ip_has_options(thoff)))
+		return -1;
+
+	thoff += offset;
+
+	ipproto = iph->protocol;
+	switch (ipproto) {
+	case IPPROTO_TCP:
+		*hdrsize = sizeof(struct tcphdr);
+		break;
+	case IPPROTO_UDP:
+		*hdrsize = sizeof(struct udphdr);
+		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		*hdrsize = sizeof(struct gre_base_hdr);
+		break;
+#endif
+	default:
+		return -1;
+	}
+
+	if (iph->ttl <= 1)
+		return -1;
+
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
+		return -1;
+
+	switch (ipproto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+		tuple->src_port		= ports->source;
+		tuple->dst_port		= ports->dest;
+		break;
+	case IPPROTO_GRE: {
+		struct gre_base_hdr *greh;
+
+		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+			return -1;
+		break;
+	}
+	}
+
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+
+	tuple->src_v4.s_addr	= iph->saddr;
+	tuple->dst_v4.s_addr	= iph->daddr;
+	tuple->l3proto		= AF_INET;
+	tuple->l4proto		= ipproto;
+	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
+
+	return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
+		return false;
+
+	return true;
+}
+
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+	if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+	    tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+		return true;
+
+	return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
+static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+				      const struct nf_hook_state *state,
+				      struct dst_entry *dst)
+{
+	skb_orphan(skb);
+	skb_dst_set_noref(skb, dst);
+	dst_output(state->net, state->sk, skb);
+	return NF_STOLEN;
+}
+
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+				       u32 *offset)
+{
+	struct vlan_ethhdr *veth;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		if (veth->h_vlan_encapsulated_proto == proto) {
+			*offset += VLAN_HLEN;
+			return true;
+		}
+		break;
+	case htons(ETH_P_PPP_SES):
+		if (nf_flow_pppoe_proto(skb) == proto) {
+			*offset += PPPOE_SES_HLEN;
+			return true;
+		}
+		break;
+	}
+
+	return false;
+}
+
+static void nf_flow_encap_pop(struct sk_buff *skb,
+			      struct flow_offload_tuple_rhash *tuplehash)
+{
+	struct vlan_hdr *vlan_hdr;
+	int i;
+
+	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
+		if (skb_vlan_tag_present(skb)) {
+			__vlan_hwaccel_clear_tag(skb);
+			continue;
+		}
+		switch (skb->protocol) {
+		case htons(ETH_P_8021Q):
+			vlan_hdr = (struct vlan_hdr *)skb->data;
+			__skb_pull(skb, VLAN_HLEN);
+			vlan_set_encap_proto(skb, vlan_hdr);
+			skb_reset_network_header(skb);
+			break;
+		case htons(ETH_P_PPP_SES):
+			skb->protocol = nf_flow_pppoe_proto(skb);
+			skb_pull(skb, PPPOE_SES_HLEN);
+			skb_reset_network_header(skb);
+			break;
+		}
+	}
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+				       const struct flow_offload_tuple_rhash *tuplehash,
+				       unsigned short type)
+{
+	struct net_device *outdev;
+
+	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+	if (!outdev)
+		return NF_DROP;
+
+	skb->dev = outdev;
+	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+			tuplehash->tuple.out.h_source, skb->len);
+	dev_queue_xmit(skb);
+
+	return NF_STOLEN;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+			const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple tuple = {};
+	enum flow_offload_tuple_dir dir;
+	struct flow_offload *flow;
+	struct net_device *outdev;
+	u32 hdrsize, offset = 0;
+	unsigned int thoff, mtu;
+	struct rtable *rt;
+	struct iphdr *iph;
+	__be32 nexthop;
+	int ret;
+
+	if (skb->protocol != htons(ETH_P_IP) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
+		return NF_ACCEPT;
+
+	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
+		return NF_ACCEPT;
+
+	tuplehash = flow_offload_lookup(flow_table, &tuple);
+	if (tuplehash == NULL)
+		return NF_ACCEPT;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+		return NF_ACCEPT;
+
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4) + offset;
+	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+		return NF_ACCEPT;
+
+	if (!nf_flow_dst_check(&tuplehash->tuple)) {
+		flow_offload_teardown(flow);
+		return NF_ACCEPT;
+	}
+
+	if (skb_try_make_writable(skb, thoff + hdrsize))
+		return NF_DROP;
+
+	flow_offload_refresh(flow_table, flow, false);
+
+	nf_flow_encap_pop(skb, tuplehash);
+	thoff -= offset;
+
+	iph = ip_hdr(skb);
+	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
+
+	ip_decrease_ttl(iph);
+	skb_clear_tstamp(skb);
+
+	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
+		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+		IPCB(skb)->iif = skb->dev->ifindex;
+		IPCB(skb)->flags = IPSKB_FORWARDED;
+		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+	}
+
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr,
+				 struct ipv6hdr *ip6h)
+{
+	struct tcphdr *tcph;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+				  new_addr->s6_addr32, true);
+}
+
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr)
+{
+	struct udphdr *udph;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+					  new_addr->s6_addr32, true);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+}
+
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+				     unsigned int thoff, struct in6_addr *addr,
+				     struct in6_addr *new_addr)
+{
+	switch (ip6h->nexthdr) {
+	case IPPROTO_TCP:
+		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
+		break;
+	case IPPROTO_UDP:
+		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
+		break;
+	}
+}
+
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	}
+
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	}
+
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb,
+			     enum flow_offload_tuple_dir dir,
+			     struct ipv6hdr *ip6h)
+{
+	unsigned int thoff = sizeof(*ip6h);
+
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+			      struct flow_offload_tuple *tuple, u32 *hdrsize,
+			      u32 offset)
+{
+	struct flow_ports *ports;
+	struct ipv6hdr *ip6h;
+	unsigned int thoff;
+	u8 nexthdr;
+
+	thoff = sizeof(*ip6h) + offset;
+	if (!pskb_may_pull(skb, thoff))
+		return -1;
+
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+
+	nexthdr = ip6h->nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_TCP:
+		*hdrsize = sizeof(struct tcphdr);
+		break;
+	case IPPROTO_UDP:
+		*hdrsize = sizeof(struct udphdr);
+		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		*hdrsize = sizeof(struct gre_base_hdr);
+		break;
+#endif
+	default:
+		return -1;
+	}
+
+	if (ip6h->hop_limit <= 1)
+		return -1;
+
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
+		return -1;
+
+	switch (nexthdr) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+		tuple->src_port		= ports->source;
+		tuple->dst_port		= ports->dest;
+		break;
+	case IPPROTO_GRE: {
+		struct gre_base_hdr *greh;
+
+		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+			return -1;
+		break;
+	}
+	}
+
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+
+	tuple->src_v6		= ip6h->saddr;
+	tuple->dst_v6		= ip6h->daddr;
+	tuple->l3proto		= AF_INET6;
+	tuple->l4proto		= nexthdr;
+	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
+
+	return 0;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+			  const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple tuple = {};
+	enum flow_offload_tuple_dir dir;
+	const struct in6_addr *nexthop;
+	struct flow_offload *flow;
+	struct net_device *outdev;
+	unsigned int thoff, mtu;
+	u32 hdrsize, offset = 0;
+	struct ipv6hdr *ip6h;
+	struct rt6_info *rt;
+	int ret;
+
+	if (skb->protocol != htons(ETH_P_IPV6) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
+		return NF_ACCEPT;
+
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
+		return NF_ACCEPT;
+
+	tuplehash = flow_offload_lookup(flow_table, &tuple);
+	if (tuplehash == NULL)
+		return NF_ACCEPT;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+		return NF_ACCEPT;
+
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+	thoff = sizeof(*ip6h) + offset;
+	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+		return NF_ACCEPT;
+
+	if (!nf_flow_dst_check(&tuplehash->tuple)) {
+		flow_offload_teardown(flow);
+		return NF_ACCEPT;
+	}
+
+	if (skb_try_make_writable(skb, thoff + hdrsize))
+		return NF_DROP;
+
+	flow_offload_refresh(flow_table, flow, false);
+
+	nf_flow_encap_pop(skb, tuplehash);
+
+	ip6h = ipv6_hdr(skb);
+	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
+
+	ip6h->hop_limit--;
+	skb_clear_tstamp(skb);
+
+	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
+		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+		IP6CB(skb)->iif = skb->dev->ifindex;
+		IP6CB(skb)->flags = IP6SKB_FORWARDED;
+		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+	}
+
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
new file mode 100644
index 000000000..1c26f03fc
--- /dev/null
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -0,0 +1,1241 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <linux/tc_act/tc_csum.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+static struct workqueue_struct *nf_flow_offload_add_wq;
+static struct workqueue_struct *nf_flow_offload_del_wq;
+static struct workqueue_struct *nf_flow_offload_stats_wq;
+
+struct flow_offload_work {
+	struct list_head	list;
+	enum flow_cls_command	cmd;
+	struct nf_flowtable	*flowtable;
+	struct flow_offload	*flow;
+	struct work_struct	work;
+};
+
+#define NF_FLOW_DISSECTOR(__match, __type, __field)	\
+	(__match)->dissector.offset[__type] =		\
+		offsetof(struct nf_flow_key, __field)
+
+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
+				   struct ip_tunnel_info *tun_info)
+{
+	struct nf_flow_key *mask = &match->mask;
+	struct nf_flow_key *key = &match->key;
+	unsigned int enc_keys;
+
+	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
+		return;
+
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
+	mask->enc_key_id.keyid = 0xffffffff;
+	enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+		   BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+
+	if (ip_tunnel_info_af(tun_info) == AF_INET) {
+		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+				  enc_ipv4);
+		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
+		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
+		if (key->enc_ipv4.src)
+			mask->enc_ipv4.src = 0xffffffff;
+		if (key->enc_ipv4.dst)
+			mask->enc_ipv4.dst = 0xffffffff;
+		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	} else {
+		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
+		       sizeof(struct in6_addr));
+		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
+		       sizeof(struct in6_addr));
+		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
+			   sizeof(struct in6_addr)))
+			memset(&mask->enc_ipv6.src, 0xff,
+			       sizeof(struct in6_addr));
+		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
+			   sizeof(struct in6_addr)))
+			memset(&mask->enc_ipv6.dst, 0xff,
+			       sizeof(struct in6_addr));
+		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	}
+
+	match->dissector.used_keys |= enc_keys;
+}
+
+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
+				    struct flow_dissector_key_vlan *mask,
+				    u16 vlan_id, __be16 proto)
+{
+	key->vlan_id = vlan_id;
+	mask->vlan_id = VLAN_VID_MASK;
+	key->vlan_tpid = proto;
+	mask->vlan_tpid = 0xffff;
+}
+
+static int nf_flow_rule_match(struct nf_flow_match *match,
+			      const struct flow_offload_tuple *tuple,
+			      struct dst_entry *other_dst)
+{
+	struct nf_flow_key *mask = &match->mask;
+	struct nf_flow_key *key = &match->key;
+	struct ip_tunnel_info *tun_info;
+	bool vlan_encap = false;
+
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
+	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
+
+	if (other_dst && other_dst->lwtstate) {
+		tun_info = lwt_tun_info(other_dst->lwtstate);
+		nf_flow_rule_lwt_match(match, tun_info);
+	}
+
+	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
+		key->meta.ingress_ifindex = tuple->tc.iifidx;
+	else
+		key->meta.ingress_ifindex = tuple->iifidx;
+
+	mask->meta.ingress_ifindex = 0xffffffff;
+
+	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
+	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
+		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
+		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+					tuple->encap[0].id,
+					tuple->encap[0].proto);
+		vlan_encap = true;
+	}
+
+	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
+	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
+		if (vlan_encap) {
+			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
+					  cvlan);
+			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
+						tuple->encap[1].id,
+						tuple->encap[1].proto);
+		} else {
+			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
+					  vlan);
+			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+						tuple->encap[1].id,
+						tuple->encap[1].proto);
+		}
+	}
+
+	switch (tuple->l3proto) {
+	case AF_INET:
+		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		key->basic.n_proto = htons(ETH_P_IP);
+		key->ipv4.src = tuple->src_v4.s_addr;
+		mask->ipv4.src = 0xffffffff;
+		key->ipv4.dst = tuple->dst_v4.s_addr;
+		mask->ipv4.dst = 0xffffffff;
+		break;
+       case AF_INET6:
+		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		key->basic.n_proto = htons(ETH_P_IPV6);
+		key->ipv6.src = tuple->src_v6;
+		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
+		key->ipv6.dst = tuple->dst_v6;
+		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	mask->control.addr_type = 0xffff;
+	match->dissector.used_keys |= BIT(key->control.addr_type);
+	mask->basic.n_proto = 0xffff;
+
+	switch (tuple->l4proto) {
+	case IPPROTO_TCP:
+		key->tcp.flags = 0;
+		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_GRE:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	key->basic.ip_proto = tuple->l4proto;
+	mask->basic.ip_proto = 0xff;
+
+	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
+				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+				      BIT(FLOW_DISSECTOR_KEY_BASIC);
+
+	switch (tuple->l4proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		key->tp.src = tuple->src_port;
+		mask->tp.src = 0xffff;
+		key->tp.dst = tuple->dst_port;
+		mask->tp.dst = 0xffff;
+
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+		break;
+	}
+
+	return 0;
+}
+
+static void flow_offload_mangle(struct flow_action_entry *entry,
+				enum flow_action_mangle_base htype, u32 offset,
+				const __be32 *value, const __be32 *mask)
+{
+	entry->id = FLOW_ACTION_MANGLE;
+	entry->mangle.htype = htype;
+	entry->mangle.offset = offset;
+	memcpy(&entry->mangle.mask, mask, sizeof(u32));
+	memcpy(&entry->mangle.val, value, sizeof(u32));
+}
+
+static inline struct flow_action_entry *
+flow_action_entry_next(struct nf_flow_rule *flow_rule)
+{
+	int i = flow_rule->rule->action.num_entries++;
+
+	return &flow_rule->rule->action.entries[i];
+}
+
+static int flow_offload_eth_src(struct net *net,
+				const struct flow_offload *flow,
+				enum flow_offload_tuple_dir dir,
+				struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
+	struct net_device *dev = NULL;
+	const unsigned char *addr;
+	u32 mask, val;
+	u16 val16;
+
+	this_tuple = &flow->tuplehash[dir].tuple;
+
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		addr = this_tuple->out.h_source;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		dev = dev_get_by_index(net, other_tuple->iifidx);
+		if (!dev)
+			return -ENOENT;
+
+		addr = dev->dev_addr;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	mask = ~0xffff0000;
+	memcpy(&val16, addr, 2);
+	val = val16 << 16;
+	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+			    &val, &mask);
+
+	mask = ~0xffffffff;
+	memcpy(&val, addr + 2, 4);
+	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
+			    &val, &mask);
+
+	dev_put(dev);
+
+	return 0;
+}
+
+static int flow_offload_eth_dst(struct net *net,
+				const struct flow_offload *flow,
+				enum flow_offload_tuple_dir dir,
+				struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
+	const struct dst_entry *dst_cache;
+	unsigned char ha[ETH_ALEN];
+	struct neighbour *n;
+	const void *daddr;
+	u32 mask, val;
+	u8 nud_state;
+	u16 val16;
+
+	this_tuple = &flow->tuplehash[dir].tuple;
+
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ether_addr_copy(ha, this_tuple->out.h_dest);
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		daddr = &other_tuple->src_v4;
+		dst_cache = this_tuple->dst_cache;
+		n = dst_neigh_lookup(dst_cache, daddr);
+		if (!n)
+			return -ENOENT;
+
+		read_lock_bh(&n->lock);
+		nud_state = n->nud_state;
+		ether_addr_copy(ha, n->ha);
+		read_unlock_bh(&n->lock);
+		neigh_release(n);
+
+		if (!(nud_state & NUD_VALID))
+			return -ENOENT;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	mask = ~0xffffffff;
+	memcpy(&val, ha, 4);
+	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
+			    &val, &mask);
+
+	mask = ~0x0000ffff;
+	memcpy(&val16, ha + 4, 2);
+	val = val16;
+	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+			    &val, &mask);
+
+	return 0;
+}
+
+static void flow_offload_ipv4_snat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+	u32 mask = ~htonl(0xffffffff);
+	__be32 addr;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+		offset = offsetof(struct iphdr, saddr);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+		offset = offsetof(struct iphdr, daddr);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+			    &addr, &mask);
+}
+
+static void flow_offload_ipv4_dnat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+	u32 mask = ~htonl(0xffffffff);
+	__be32 addr;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+		offset = offsetof(struct iphdr, daddr);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+		offset = offsetof(struct iphdr, saddr);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+			    &addr, &mask);
+}
+
+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+				     unsigned int offset,
+				     const __be32 *addr, const __be32 *mask)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
+		entry = flow_action_entry_next(flow_rule);
+		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+				    offset + i * sizeof(u32), &addr[i], mask);
+	}
+}
+
+static void flow_offload_ipv6_snat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	u32 mask = ~htonl(0xffffffff);
+	const __be32 *addr;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
+		offset = offsetof(struct ipv6hdr, saddr);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
+		offset = offsetof(struct ipv6hdr, daddr);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+}
+
+static void flow_offload_ipv6_dnat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	u32 mask = ~htonl(0xffffffff);
+	const __be32 *addr;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
+		offset = offsetof(struct ipv6hdr, daddr);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
+		offset = offsetof(struct ipv6hdr, saddr);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
+}
+
+static int flow_offload_l4proto(const struct flow_offload *flow)
+{
+	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+	u8 type = 0;
+
+	switch (protonum) {
+	case IPPROTO_TCP:
+		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
+		break;
+	case IPPROTO_UDP:
+		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
+		break;
+	default:
+		break;
+	}
+
+	return type;
+}
+
+static void flow_offload_port_snat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+	u32 mask, port;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
+		offset = 0; /* offsetof(struct tcphdr, source); */
+		port = htonl(port << 16);
+		mask = ~htonl(0xffff0000);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
+		offset = 0; /* offsetof(struct tcphdr, dest); */
+		port = htonl(port);
+		mask = ~htonl(0xffff);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+			    &port, &mask);
+}
+
+static void flow_offload_port_dnat(struct net *net,
+				   const struct flow_offload *flow,
+				   enum flow_offload_tuple_dir dir,
+				   struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+	u32 mask, port;
+	u32 offset;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
+		offset = 0; /* offsetof(struct tcphdr, dest); */
+		port = htonl(port);
+		mask = ~htonl(0xffff);
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
+		offset = 0; /* offsetof(struct tcphdr, source); */
+		port = htonl(port << 16);
+		mask = ~htonl(0xffff0000);
+		break;
+	default:
+		return;
+	}
+
+	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+			    &port, &mask);
+}
+
+static void flow_offload_ipv4_checksum(struct net *net,
+				       const struct flow_offload *flow,
+				       struct nf_flow_rule *flow_rule)
+{
+	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+
+	entry->id = FLOW_ACTION_CSUM;
+	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+
+	switch (protonum) {
+	case IPPROTO_TCP:
+		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
+		break;
+	case IPPROTO_UDP:
+		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
+		break;
+	}
+}
+
+static void flow_offload_redirect(struct net *net,
+				  const struct flow_offload *flow,
+				  enum flow_offload_tuple_dir dir,
+				  struct nf_flow_rule *flow_rule)
+{
+	const struct flow_offload_tuple *this_tuple, *other_tuple;
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+	int ifindex;
+
+	this_tuple = &flow->tuplehash[dir].tuple;
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		this_tuple = &flow->tuplehash[dir].tuple;
+		ifindex = this_tuple->out.hw_ifidx;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		ifindex = other_tuple->iifidx;
+		break;
+	default:
+		return;
+	}
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev)
+		return;
+
+	entry = flow_action_entry_next(flow_rule);
+	entry->id = FLOW_ACTION_REDIRECT;
+	entry->dev = dev;
+}
+
+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
+				      enum flow_offload_tuple_dir dir,
+				      struct nf_flow_rule *flow_rule)
+{
+	const struct flow_offload_tuple *this_tuple;
+	struct flow_action_entry *entry;
+	struct dst_entry *dst;
+
+	this_tuple = &flow->tuplehash[dir].tuple;
+	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = this_tuple->dst_cache;
+	if (dst && dst->lwtstate) {
+		struct ip_tunnel_info *tun_info;
+
+		tun_info = lwt_tun_info(dst->lwtstate);
+		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+			entry = flow_action_entry_next(flow_rule);
+			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+			entry->tunnel = tun_info;
+		}
+	}
+}
+
+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
+				      enum flow_offload_tuple_dir dir,
+				      struct nf_flow_rule *flow_rule)
+{
+	const struct flow_offload_tuple *other_tuple;
+	struct flow_action_entry *entry;
+	struct dst_entry *dst;
+
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = other_tuple->dst_cache;
+	if (dst && dst->lwtstate) {
+		struct ip_tunnel_info *tun_info;
+
+		tun_info = lwt_tun_info(dst->lwtstate);
+		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+			entry = flow_action_entry_next(flow_rule);
+			entry->id = FLOW_ACTION_TUNNEL_DECAP;
+		}
+	}
+}
+
+static int
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
+			  enum flow_offload_tuple_dir dir,
+			  struct nf_flow_rule *flow_rule)
+{
+	const struct flow_offload_tuple *other_tuple;
+	const struct flow_offload_tuple *tuple;
+	int i;
+
+	flow_offload_decap_tunnel(flow, dir, flow_rule);
+	flow_offload_encap_tunnel(flow, dir, flow_rule);
+
+	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+		return -1;
+
+	tuple = &flow->tuplehash[dir].tuple;
+
+	for (i = 0; i < tuple->encap_num; i++) {
+		struct flow_action_entry *entry;
+
+		if (tuple->in_vlan_ingress & BIT(i))
+			continue;
+
+		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
+			entry = flow_action_entry_next(flow_rule);
+			entry->id = FLOW_ACTION_VLAN_POP;
+		}
+	}
+
+	other_tuple = &flow->tuplehash[!dir].tuple;
+
+	for (i = 0; i < other_tuple->encap_num; i++) {
+		struct flow_action_entry *entry;
+
+		if (other_tuple->in_vlan_ingress & BIT(i))
+			continue;
+
+		entry = flow_action_entry_next(flow_rule);
+
+		switch (other_tuple->encap[i].proto) {
+		case htons(ETH_P_PPP_SES):
+			entry->id = FLOW_ACTION_PPPOE_PUSH;
+			entry->pppoe.sid = other_tuple->encap[i].id;
+			break;
+		case htons(ETH_P_8021Q):
+			entry->id = FLOW_ACTION_VLAN_PUSH;
+			entry->vlan.vid = other_tuple->encap[i].id;
+			entry->vlan.proto = other_tuple->encap[i].proto;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
+			    enum flow_offload_tuple_dir dir,
+			    struct nf_flow_rule *flow_rule)
+{
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+		return -1;
+
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
+		flow_offload_port_snat(net, flow, dir, flow_rule);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
+		flow_offload_port_dnat(net, flow, dir, flow_rule);
+	}
+	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
+	    test_bit(NF_FLOW_DNAT, &flow->flags))
+		flow_offload_ipv4_checksum(net, flow, flow_rule);
+
+	flow_offload_redirect(net, flow, dir, flow_rule);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
+
+int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
+			    enum flow_offload_tuple_dir dir,
+			    struct nf_flow_rule *flow_rule)
+{
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+		return -1;
+
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		flow_offload_ipv6_snat(net, flow, dir, flow_rule);
+		flow_offload_port_snat(net, flow, dir, flow_rule);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
+		flow_offload_port_dnat(net, flow, dir, flow_rule);
+	}
+
+	flow_offload_redirect(net, flow, dir, flow_rule);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
+
+#define NF_FLOW_RULE_ACTION_MAX	16
+
+static struct nf_flow_rule *
+nf_flow_offload_rule_alloc(struct net *net,
+			   const struct flow_offload_work *offload,
+			   enum flow_offload_tuple_dir dir)
+{
+	const struct nf_flowtable *flowtable = offload->flowtable;
+	const struct flow_offload_tuple *tuple, *other_tuple;
+	struct flow_offload *flow = offload->flow;
+	struct dst_entry *other_dst = NULL;
+	struct nf_flow_rule *flow_rule;
+	int err = -ENOMEM;
+
+	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
+	if (!flow_rule)
+		goto err_flow;
+
+	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
+	if (!flow_rule->rule)
+		goto err_flow_rule;
+
+	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
+	flow_rule->rule->match.mask = &flow_rule->match.mask;
+	flow_rule->rule->match.key = &flow_rule->match.key;
+
+	tuple = &flow->tuplehash[dir].tuple;
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+		other_dst = other_tuple->dst_cache;
+
+	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
+	if (err < 0)
+		goto err_flow_match;
+
+	flow_rule->rule->action.num_entries = 0;
+	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
+		goto err_flow_match;
+
+	return flow_rule;
+
+err_flow_match:
+	kfree(flow_rule->rule);
+err_flow_rule:
+	kfree(flow_rule);
+err_flow:
+	return NULL;
+}
+
+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
+		entry = &flow_rule->rule->action.entries[i];
+		if (entry->id != FLOW_ACTION_REDIRECT)
+			continue;
+
+		dev_put(entry->dev);
+	}
+	kfree(flow_rule->rule);
+	kfree(flow_rule);
+}
+
+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
+{
+	int i;
+
+	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
+		__nf_flow_offload_destroy(flow_rule[i]);
+}
+
+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
+				 struct nf_flow_rule *flow_rule[])
+{
+	struct net *net = read_pnet(&offload->flowtable->net);
+
+	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
+						  FLOW_OFFLOAD_DIR_ORIGINAL);
+	if (!flow_rule[0])
+		return -ENOMEM;
+
+	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
+						  FLOW_OFFLOAD_DIR_REPLY);
+	if (!flow_rule[1]) {
+		__nf_flow_offload_destroy(flow_rule[0]);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
+				 __be16 proto, int priority,
+				 enum flow_cls_command cmd,
+				 const struct flow_offload_tuple *tuple,
+				 struct netlink_ext_ack *extack)
+{
+	cls_flow->common.protocol = proto;
+	cls_flow->common.prio = priority;
+	cls_flow->common.extack = extack;
+	cls_flow->command = cmd;
+	cls_flow->cookie = (unsigned long)tuple;
+}
+
+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
+				 struct flow_offload *flow,
+				 struct nf_flow_rule *flow_rule,
+				 enum flow_offload_tuple_dir dir,
+				 int priority, int cmd,
+				 struct flow_stats *stats,
+				 struct list_head *block_cb_list)
+{
+	struct flow_cls_offload cls_flow = {};
+	struct flow_block_cb *block_cb;
+	struct netlink_ext_ack extack;
+	__be16 proto = ETH_P_ALL;
+	int err, i = 0;
+
+	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
+			     &flow->tuplehash[dir].tuple, &extack);
+	if (cmd == FLOW_CLS_REPLACE)
+		cls_flow.rule = flow_rule->rule;
+
+	down_read(&flowtable->flow_block_lock);
+	list_for_each_entry(block_cb, block_cb_list, list) {
+		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
+				   block_cb->cb_priv);
+		if (err < 0)
+			continue;
+
+		i++;
+	}
+	up_read(&flowtable->flow_block_lock);
+
+	if (cmd == FLOW_CLS_STATS)
+		memcpy(stats, &cls_flow.stats, sizeof(*stats));
+
+	return i;
+}
+
+static int flow_offload_tuple_add(struct flow_offload_work *offload,
+				  struct nf_flow_rule *flow_rule,
+				  enum flow_offload_tuple_dir dir)
+{
+	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
+				     flow_rule, dir,
+				     offload->flowtable->priority,
+				     FLOW_CLS_REPLACE, NULL,
+				     &offload->flowtable->flow_block.cb_list);
+}
+
+static void flow_offload_tuple_del(struct flow_offload_work *offload,
+				   enum flow_offload_tuple_dir dir)
+{
+	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
+			      offload->flowtable->priority,
+			      FLOW_CLS_DESTROY, NULL,
+			      &offload->flowtable->flow_block.cb_list);
+}
+
+static int flow_offload_rule_add(struct flow_offload_work *offload,
+				 struct nf_flow_rule *flow_rule[])
+{
+	int ok_count = 0;
+
+	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
+					   FLOW_OFFLOAD_DIR_ORIGINAL);
+	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+		ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+						   FLOW_OFFLOAD_DIR_REPLY);
+	if (ok_count == 0)
+		return -ENOENT;
+
+	return 0;
+}
+
+static void flow_offload_work_add(struct flow_offload_work *offload)
+{
+	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
+	int err;
+
+	err = nf_flow_offload_alloc(offload, flow_rule);
+	if (err < 0)
+		return;
+
+	err = flow_offload_rule_add(offload, flow_rule);
+	if (err < 0)
+		goto out;
+
+	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+
+out:
+	nf_flow_offload_destroy(flow_rule);
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+		flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
+}
+
+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
+				     enum flow_offload_tuple_dir dir,
+				     struct flow_stats *stats)
+{
+	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
+			      offload->flowtable->priority,
+			      FLOW_CLS_STATS, stats,
+			      &offload->flowtable->flow_block.cb_list);
+}
+
+static void flow_offload_work_stats(struct flow_offload_work *offload)
+{
+	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+	u64 lastused;
+
+	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+	if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+		flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+					 &stats[1]);
+
+	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
+	offload->flow->timeout = max_t(u64, offload->flow->timeout,
+				       lastused + flow_offload_get_timeout(offload->flow));
+
+	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
+		if (stats[0].pkts)
+			nf_ct_acct_add(offload->flow->ct,
+				       FLOW_OFFLOAD_DIR_ORIGINAL,
+				       stats[0].pkts, stats[0].bytes);
+		if (stats[1].pkts)
+			nf_ct_acct_add(offload->flow->ct,
+				       FLOW_OFFLOAD_DIR_REPLY,
+				       stats[1].pkts, stats[1].bytes);
+	}
+}
+
+static void flow_offload_work_handler(struct work_struct *work)
+{
+	struct flow_offload_work *offload;
+	struct net *net;
+
+	offload = container_of(work, struct flow_offload_work, work);
+	net = read_pnet(&offload->flowtable->net);
+	switch (offload->cmd) {
+		case FLOW_CLS_REPLACE:
+			flow_offload_work_add(offload);
+			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add);
+			break;
+		case FLOW_CLS_DESTROY:
+			flow_offload_work_del(offload);
+			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del);
+			break;
+		case FLOW_CLS_STATS:
+			flow_offload_work_stats(offload);
+			NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+	}
+
+	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
+	kfree(offload);
+}
+
+static void flow_offload_queue_work(struct flow_offload_work *offload)
+{
+	struct net *net = read_pnet(&offload->flowtable->net);
+
+	if (offload->cmd == FLOW_CLS_REPLACE) {
+		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
+		queue_work(nf_flow_offload_add_wq, &offload->work);
+	} else if (offload->cmd == FLOW_CLS_DESTROY) {
+		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
+		queue_work(nf_flow_offload_del_wq, &offload->work);
+	} else {
+		NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
+		queue_work(nf_flow_offload_stats_wq, &offload->work);
+	}
+}
+
+static struct flow_offload_work *
+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
+			   struct flow_offload *flow, unsigned int cmd)
+{
+	struct flow_offload_work *offload;
+
+	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
+		return NULL;
+
+	offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+	if (!offload) {
+		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
+		return NULL;
+	}
+
+	offload->cmd = cmd;
+	offload->flow = flow;
+	offload->flowtable = flowtable;
+	INIT_WORK(&offload->work, flow_offload_work_handler);
+
+	return offload;
+}
+
+
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+			 struct flow_offload *flow)
+{
+	struct flow_offload_work *offload;
+
+	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
+	if (!offload)
+		return;
+
+	flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+			 struct flow_offload *flow)
+{
+	struct flow_offload_work *offload;
+
+	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
+	if (!offload)
+		return;
+
+	set_bit(NF_FLOW_HW_DYING, &flow->flags);
+	flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+			   struct flow_offload *flow)
+{
+	struct flow_offload_work *offload;
+	__s32 delta;
+
+	delta = nf_flow_timeout_delta(flow->timeout);
+	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
+		return;
+
+	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
+	if (!offload)
+		return;
+
+	flow_offload_queue_work(offload);
+}
+
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
+{
+	if (nf_flowtable_hw_offload(flowtable)) {
+		flush_workqueue(nf_flow_offload_del_wq);
+		nf_flow_table_gc_run(flowtable);
+	}
+}
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
+{
+	if (nf_flowtable_hw_offload(flowtable)) {
+		flush_workqueue(nf_flow_offload_add_wq);
+		flush_workqueue(nf_flow_offload_del_wq);
+		flush_workqueue(nf_flow_offload_stats_wq);
+	}
+}
+
+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+				     struct flow_block_offload *bo,
+				     enum flow_block_command cmd)
+{
+	struct flow_block_cb *block_cb, *next;
+	int err = 0;
+
+	down_write(&flowtable->flow_block_lock);
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+			list_del(&block_cb->list);
+			flow_block_cb_free(block_cb);
+		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+	up_write(&flowtable->flow_block_lock);
+
+	return err;
+}
+
+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
+					     struct net *net,
+					     enum flow_block_command cmd,
+					     struct nf_flowtable *flowtable,
+					     struct netlink_ext_ack *extack)
+{
+	memset(bo, 0, sizeof(*bo));
+	bo->net		= net;
+	bo->block	= &flowtable->flow_block;
+	bo->command	= cmd;
+	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo->extack	= extack;
+	bo->cb_list_head = &flowtable->flow_block.cb_list;
+	INIT_LIST_HEAD(&bo->cb_list);
+}
+
+static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
+{
+	struct nf_flowtable *flowtable = block_cb->indr.data;
+	struct net_device *dev = block_cb->indr.dev;
+
+	nf_flow_table_gc_cleanup(flowtable, dev);
+	down_write(&flowtable->flow_block_lock);
+	list_del(&block_cb->list);
+	list_del(&block_cb->driver_list);
+	flow_block_cb_free(block_cb);
+	up_write(&flowtable->flow_block_lock);
+}
+
+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
+					  struct nf_flowtable *flowtable,
+					  struct net_device *dev,
+					  enum flow_block_command cmd,
+					  struct netlink_ext_ack *extack)
+{
+	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+					 extack);
+
+	return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
+					   nf_flow_table_indr_cleanup);
+}
+
+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
+				     struct nf_flowtable *flowtable,
+				     struct net_device *dev,
+				     enum flow_block_command cmd,
+				     struct netlink_ext_ack *extack)
+{
+	int err;
+
+	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+					 extack);
+	down_write(&flowtable->flow_block_lock);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
+	up_write(&flowtable->flow_block_lock);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+				struct net_device *dev,
+				enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo;
+	int err;
+
+	if (!nf_flowtable_hw_offload(flowtable))
+		return 0;
+
+	if (dev->netdev_ops->ndo_setup_tc)
+		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
+						&extack);
+	else
+		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
+						     &extack);
+	if (err < 0)
+		return err;
+
+	return nf_flow_table_block_setup(flowtable, &bo, cmd);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
+
+int nf_flow_table_offload_init(void)
+{
+	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_add_wq)
+		return -ENOMEM;
+
+	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_del_wq)
+		goto err_del_wq;
+
+	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
+						    WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_stats_wq)
+		goto err_stats_wq;
+
+	return 0;
+
+err_stats_wq:
+	destroy_workqueue(nf_flow_offload_del_wq);
+err_del_wq:
+	destroy_workqueue(nf_flow_offload_add_wq);
+	return -ENOMEM;
+}
+
+void nf_flow_table_offload_exit(void)
+{
+	destroy_workqueue(nf_flow_offload_add_wq);
+	destroy_workqueue(nf_flow_offload_del_wq);
+	destroy_workqueue(nf_flow_offload_stats_wq);
+}
diff --git a/net/netfilter/nf_flow_table_procfs.c b/net/netfilter/nf_flow_table_procfs.c
new file mode 100644
index 000000000..159b033a4
--- /dev/null
+++ b/net/netfilter/nf_flow_table_procfs.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <net/netfilter/nf_flow_table.h>
+
+static void *nf_flow_table_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos - 1; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(net->ft.stat, cpu);
+	}
+
+	return NULL;
+}
+
+static void *nf_flow_table_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	int cpu;
+
+	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(net->ft.stat, cpu);
+	}
+	(*pos)++;
+	return NULL;
+}
+
+static void nf_flow_table_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int nf_flow_table_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	const struct nf_flow_table_stat *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "wq_add   wq_del   wq_stats\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%8d %8d %8d\n",
+		   st->count_wq_add,
+		   st->count_wq_del,
+		   st->count_wq_stats
+		);
+	return 0;
+}
+
+static const struct seq_operations nf_flow_table_cpu_seq_ops = {
+	.start	= nf_flow_table_cpu_seq_start,
+	.next	= nf_flow_table_cpu_seq_next,
+	.stop	= nf_flow_table_cpu_seq_stop,
+	.show	= nf_flow_table_cpu_seq_show,
+};
+
+int nf_flow_table_init_proc(struct net *net)
+{
+	struct proc_dir_entry *pde;
+
+	pde = proc_create_net("nf_flowtable", 0444, net->proc_net_stat,
+			      &nf_flow_table_cpu_seq_ops,
+			      sizeof(struct seq_net_private));
+	return pde ? 0 : -ENOMEM;
+}
+
+void nf_flow_table_fini_proc(struct net *net)
+{
+	remove_proc_entry("nf_flowtable", net->proc_net_stat);
+}
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
new file mode 100644
index 000000000..00e89ffd7
--- /dev/null
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sysctl.h>
+#include <net/lwtunnel.h>
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+
+static inline int nf_hooks_lwtunnel_get(void)
+{
+	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+		return 1;
+	else
+		return 0;
+}
+
+static inline int nf_hooks_lwtunnel_set(int enable)
+{
+	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
+		if (!enable)
+			return -EBUSY;
+	} else if (enable) {
+		static_branch_enable(&nf_hooks_lwtunnel_enabled);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+				     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int proc_nf_hooks_lwtunnel_enabled = 0;
+	struct ctl_table tmp = {
+		.procname = table->procname,
+		.data = &proc_nf_hooks_lwtunnel_enabled,
+		.maxlen = sizeof(int),
+		.mode = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+	int ret;
+
+	if (!write)
+		proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
+
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+	if (write && ret == 0)
+		ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
new file mode 100644
index 000000000..832ae6417
--- /dev/null
+++ b/net/netfilter/nf_internals.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NF_INTERNALS_H
+#define _NF_INTERNALS_H
+
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+/* nf_conntrack_netlink.c: applied on tuple filters */
+#define CTA_FILTER_F_CTA_IP_SRC			(1 << 0)
+#define CTA_FILTER_F_CTA_IP_DST			(1 << 1)
+#define CTA_FILTER_F_CTA_TUPLE_ZONE		(1 << 2)
+#define CTA_FILTER_F_CTA_PROTO_NUM		(1 << 3)
+#define CTA_FILTER_F_CTA_PROTO_SRC_PORT		(1 << 4)
+#define CTA_FILTER_F_CTA_PROTO_DST_PORT		(1 << 5)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_TYPE	(1 << 6)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_CODE	(1 << 7)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_ID		(1 << 8)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE	(1 << 9)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE	(1 << 10)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_ID	(1 << 11)
+#define CTA_FILTER_F_MAX			(1 << 12)
+#define CTA_FILTER_F_ALL			(CTA_FILTER_F_MAX-1)
+#define CTA_FILTER_FLAG(ctattr) CTA_FILTER_F_ ## ctattr
+
+/* nf_queue.c */
+void nf_queue_nf_hook_drop(struct net *net);
+
+/* nf_log.c */
+int __init netfilter_log_init(void);
+
+/* core.c */
+void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
+				const struct nf_hook_ops *reg);
+int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp,
+				const struct nf_hook_ops *reg);
+#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
new file mode 100644
index 000000000..8a2929014
--- /dev/null
+++ b/net/netfilter/nf_log.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_log.h>
+
+#include "nf_internals.h"
+
+/* Internal logging interface, which relies on the real
+   LOG target modules */
+
+#define NFLOGGER_NAME_LEN		64
+
+int sysctl_nf_log_all_netns __read_mostly;
+EXPORT_SYMBOL(sysctl_nf_log_all_netns);
+
+static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+static DEFINE_MUTEX(nf_log_mutex);
+
+#define nft_log_dereference(logger) \
+	rcu_dereference_protected(logger, lockdep_is_held(&nf_log_mutex))
+
+static struct nf_logger *__find_logger(int pf, const char *str_logger)
+{
+	struct nf_logger *log;
+	int i;
+
+	for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+		if (loggers[pf][i] == NULL)
+			continue;
+
+		log = nft_log_dereference(loggers[pf][i]);
+		if (!strncasecmp(str_logger, log->name, strlen(log->name)))
+			return log;
+	}
+
+	return NULL;
+}
+
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+{
+	const struct nf_logger *log;
+
+	if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&nf_log_mutex);
+	log = nft_log_dereference(net->nf.nf_loggers[pf]);
+	if (log == NULL)
+		rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
+
+	mutex_unlock(&nf_log_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(nf_log_set);
+
+void nf_log_unset(struct net *net, const struct nf_logger *logger)
+{
+	int i;
+	const struct nf_logger *log;
+
+	mutex_lock(&nf_log_mutex);
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+		log = nft_log_dereference(net->nf.nf_loggers[i]);
+		if (log == logger)
+			RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
+	}
+	mutex_unlock(&nf_log_mutex);
+}
+EXPORT_SYMBOL(nf_log_unset);
+
+/* return EEXIST if the same logger is registered, 0 on success. */
+int nf_log_register(u_int8_t pf, struct nf_logger *logger)
+{
+	int i;
+	int ret = 0;
+
+	if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
+		return -EINVAL;
+
+	mutex_lock(&nf_log_mutex);
+
+	if (pf == NFPROTO_UNSPEC) {
+		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
+			if (rcu_access_pointer(loggers[i][logger->type])) {
+				ret = -EEXIST;
+				goto unlock;
+			}
+		}
+		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
+			rcu_assign_pointer(loggers[i][logger->type], logger);
+	} else {
+		if (rcu_access_pointer(loggers[pf][logger->type])) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+		rcu_assign_pointer(loggers[pf][logger->type], logger);
+	}
+
+unlock:
+	mutex_unlock(&nf_log_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(nf_log_register);
+
+void nf_log_unregister(struct nf_logger *logger)
+{
+	const struct nf_logger *log;
+	int i;
+
+	mutex_lock(&nf_log_mutex);
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+		log = nft_log_dereference(loggers[i][logger->type]);
+		if (log == logger)
+			RCU_INIT_POINTER(loggers[i][logger->type], NULL);
+	}
+	mutex_unlock(&nf_log_mutex);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unregister);
+
+int nf_log_bind_pf(struct net *net, u_int8_t pf,
+		   const struct nf_logger *logger)
+{
+	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
+		return -EINVAL;
+	mutex_lock(&nf_log_mutex);
+	if (__find_logger(pf, logger->name) == NULL) {
+		mutex_unlock(&nf_log_mutex);
+		return -ENOENT;
+	}
+	rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
+	mutex_unlock(&nf_log_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(nf_log_bind_pf);
+
+void nf_log_unbind_pf(struct net *net, u_int8_t pf)
+{
+	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
+		return;
+	mutex_lock(&nf_log_mutex);
+	RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
+	mutex_unlock(&nf_log_mutex);
+}
+EXPORT_SYMBOL(nf_log_unbind_pf);
+
+int nf_logger_find_get(int pf, enum nf_log_type type)
+{
+	struct nf_logger *logger;
+	int ret = -ENOENT;
+
+	if (pf == NFPROTO_INET) {
+		ret = nf_logger_find_get(NFPROTO_IPV4, type);
+		if (ret < 0)
+			return ret;
+
+		ret = nf_logger_find_get(NFPROTO_IPV6, type);
+		if (ret < 0) {
+			nf_logger_put(NFPROTO_IPV4, type);
+			return ret;
+		}
+
+		return 0;
+	}
+
+	rcu_read_lock();
+	logger = rcu_dereference(loggers[pf][type]);
+	if (logger == NULL)
+		goto out;
+
+	if (try_module_get(logger->me))
+		ret = 0;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_logger_find_get);
+
+void nf_logger_put(int pf, enum nf_log_type type)
+{
+	struct nf_logger *logger;
+
+	if (pf == NFPROTO_INET) {
+		nf_logger_put(NFPROTO_IPV4, type);
+		nf_logger_put(NFPROTO_IPV6, type);
+		return;
+	}
+
+	BUG_ON(loggers[pf][type] == NULL);
+
+	rcu_read_lock();
+	logger = rcu_dereference(loggers[pf][type]);
+	module_put(logger->me);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_logger_put);
+
+void nf_log_packet(struct net *net,
+		   u_int8_t pf,
+		   unsigned int hooknum,
+		   const struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   const struct nf_loginfo *loginfo,
+		   const char *fmt, ...)
+{
+	va_list args;
+	char prefix[NF_LOG_PREFIXLEN];
+	const struct nf_logger *logger;
+
+	rcu_read_lock();
+	if (loginfo != NULL)
+		logger = rcu_dereference(loggers[pf][loginfo->type]);
+	else
+		logger = rcu_dereference(net->nf.nf_loggers[pf]);
+
+	if (logger) {
+		va_start(args, fmt);
+		vsnprintf(prefix, sizeof(prefix), fmt, args);
+		va_end(args);
+		logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_packet);
+
+void nf_log_trace(struct net *net,
+		  u_int8_t pf,
+		  unsigned int hooknum,
+		  const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct nf_loginfo *loginfo, const char *fmt, ...)
+{
+	va_list args;
+	char prefix[NF_LOG_PREFIXLEN];
+	const struct nf_logger *logger;
+
+	rcu_read_lock();
+	logger = rcu_dereference(net->nf.nf_loggers[pf]);
+	if (logger) {
+		va_start(args, fmt);
+		vsnprintf(prefix, sizeof(prefix), fmt, args);
+		va_end(args);
+		logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_trace);
+
+#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
+
+struct nf_log_buf {
+	unsigned int	count;
+	char		buf[S_SIZE + 1];
+};
+static struct nf_log_buf emergency, *emergency_ptr = &emergency;
+
+__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...)
+{
+	va_list args;
+	int len;
+
+	if (likely(m->count < S_SIZE)) {
+		va_start(args, f);
+		len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args);
+		va_end(args);
+		if (likely(m->count + len < S_SIZE)) {
+			m->count += len;
+			return 0;
+		}
+	}
+	m->count = S_SIZE;
+	printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n");
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_add);
+
+struct nf_log_buf *nf_log_buf_open(void)
+{
+	struct nf_log_buf *m = kmalloc(sizeof(*m), GFP_ATOMIC);
+
+	if (unlikely(!m)) {
+		local_bh_disable();
+		do {
+			m = xchg(&emergency_ptr, NULL);
+		} while (!m);
+	}
+	m->count = 0;
+	return m;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_open);
+
+void nf_log_buf_close(struct nf_log_buf *m)
+{
+	m->buf[m->count] = 0;
+	printk("%s\n", m->buf);
+
+	if (likely(m != &emergency))
+		kfree(m);
+	else {
+		emergency_ptr = m;
+		local_bh_enable();
+	}
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_close);
+
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+
+	mutex_lock(&nf_log_mutex);
+
+	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
+		return NULL;
+
+	return pos;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct net *net = seq_file_net(s);
+
+	(*pos)++;
+
+	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
+		return NULL;
+
+	return pos;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+	mutex_unlock(&nf_log_mutex);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+	loff_t *pos = v;
+	const struct nf_logger *logger;
+	int i;
+	struct net *net = seq_file_net(s);
+
+	logger = nft_log_dereference(net->nf.nf_loggers[*pos]);
+
+	if (!logger)
+		seq_printf(s, "%2lld NONE (", *pos);
+	else
+		seq_printf(s, "%2lld %s (", *pos, logger->name);
+
+	if (seq_has_overflowed(s))
+		return -ENOSPC;
+
+	for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+		if (loggers[*pos][i] == NULL)
+			continue;
+
+		logger = nft_log_dereference(loggers[*pos][i]);
+		seq_puts(s, logger->name);
+		if (i == 0 && loggers[*pos][i + 1] != NULL)
+			seq_puts(s, ",");
+
+		if (seq_has_overflowed(s))
+			return -ENOSPC;
+	}
+
+	seq_puts(s, ")\n");
+
+	if (seq_has_overflowed(s))
+		return -ENOSPC;
+	return 0;
+}
+
+static const struct seq_operations nflog_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show,
+};
+#endif /* PROC_FS */
+
+#ifdef CONFIG_SYSCTL
+static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
+static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
+static struct ctl_table_header *nf_log_sysctl_fhdr;
+
+static struct ctl_table nf_log_sysctl_ftable[] = {
+	{
+		.procname	= "nf_log_all_netns",
+		.data		= &sysctl_nf_log_all_netns,
+		.maxlen		= sizeof(sysctl_nf_log_all_netns),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ }
+};
+
+static int nf_log_proc_dostring(struct ctl_table *table, int write,
+			 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	const struct nf_logger *logger;
+	char buf[NFLOGGER_NAME_LEN];
+	int r = 0;
+	int tindex = (unsigned long)table->extra1;
+	struct net *net = table->extra2;
+
+	if (write) {
+		struct ctl_table tmp = *table;
+
+		/* proc_dostring() can append to existing strings, so we need to
+		 * initialize it as an empty string.
+		 */
+		buf[0] = '\0';
+		tmp.data = buf;
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+		if (r)
+			return r;
+
+		if (!strcmp(buf, "NONE")) {
+			nf_log_unbind_pf(net, tindex);
+			return 0;
+		}
+		mutex_lock(&nf_log_mutex);
+		logger = __find_logger(tindex, buf);
+		if (logger == NULL) {
+			mutex_unlock(&nf_log_mutex);
+			return -ENOENT;
+		}
+		rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
+		mutex_unlock(&nf_log_mutex);
+	} else {
+		struct ctl_table tmp = *table;
+
+		tmp.data = buf;
+		mutex_lock(&nf_log_mutex);
+		logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
+		if (!logger)
+			strscpy(buf, "NONE", sizeof(buf));
+		else
+			strscpy(buf, logger->name, sizeof(buf));
+		mutex_unlock(&nf_log_mutex);
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+	}
+
+	return r;
+}
+
+static int netfilter_log_sysctl_init(struct net *net)
+{
+	int i;
+	struct ctl_table *table;
+
+	table = nf_log_sysctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(nf_log_sysctl_table,
+				 sizeof(nf_log_sysctl_table),
+				 GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+	} else {
+		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
+			snprintf(nf_log_sysctl_fnames[i],
+				 3, "%d", i);
+			nf_log_sysctl_table[i].procname	=
+				nf_log_sysctl_fnames[i];
+			nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
+			nf_log_sysctl_table[i].mode = 0644;
+			nf_log_sysctl_table[i].proc_handler =
+				nf_log_proc_dostring;
+			nf_log_sysctl_table[i].extra1 =
+				(void *)(unsigned long) i;
+		}
+		nf_log_sysctl_fhdr = register_net_sysctl(net, "net/netfilter",
+							 nf_log_sysctl_ftable);
+		if (!nf_log_sysctl_fhdr)
+			goto err_freg;
+	}
+
+	for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
+		table[i].extra2 = net;
+
+	net->nf.nf_log_dir_header = register_net_sysctl(net,
+						"net/netfilter/nf_log",
+						table);
+	if (!net->nf.nf_log_dir_header)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+	else
+		unregister_net_sysctl_table(nf_log_sysctl_fhdr);
+err_freg:
+err_alloc:
+	return -ENOMEM;
+}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->nf.nf_log_dir_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf.nf_log_dir_header);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+	else
+		unregister_net_sysctl_table(nf_log_sysctl_fhdr);
+}
+#else
+static int netfilter_log_sysctl_init(struct net *net)
+{
+	return 0;
+}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+static int __net_init nf_log_net_init(struct net *net)
+{
+	int ret = -ENOMEM;
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net("nf_log", 0444, net->nf.proc_netfilter,
+			&nflog_seq_ops, sizeof(struct seq_net_private)))
+		return ret;
+#endif
+	ret = netfilter_log_sysctl_init(net);
+	if (ret < 0)
+		goto out_sysctl;
+
+	return 0;
+
+out_sysctl:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("nf_log", net->nf.proc_netfilter);
+#endif
+	return ret;
+}
+
+static void __net_exit nf_log_net_exit(struct net *net)
+{
+	netfilter_log_sysctl_exit(net);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("nf_log", net->nf.proc_netfilter);
+#endif
+}
+
+static struct pernet_operations nf_log_net_ops = {
+	.init = nf_log_net_init,
+	.exit = nf_log_net_exit,
+};
+
+int __init netfilter_log_init(void)
+{
+	return register_pernet_subsys(&nf_log_net_ops);
+}
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
new file mode 100644
index 000000000..584022260
--- /dev/null
+++ b/net/netfilter/nf_log_syslog.c
@@ -0,0 +1,1083 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static const struct nf_loginfo default_loginfo = {
+	.type	= NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level	  = LOGLEVEL_NOTICE,
+			.logflags = NF_LOG_DEFAULT_MASK,
+		},
+	},
+};
+
+struct arppayload {
+	unsigned char mac_src[ETH_ALEN];
+	unsigned char ip_src[4];
+	unsigned char mac_dst[ETH_ALEN];
+	unsigned char ip_dst[4];
+};
+
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+	return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
+static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!skb_vlan_tag_present(skb))
+		return;
+
+	vid = skb_vlan_tag_get(skb);
+	nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
+}
+static void noinline_for_stack
+dump_arp_packet(struct nf_log_buf *m,
+		const struct nf_loginfo *info,
+		const struct sk_buff *skb, unsigned int nhoff)
+{
+	const struct arppayload *ap;
+	struct arppayload _arpp;
+	const struct arphdr *ah;
+	unsigned int logflags;
+	struct arphdr _arph;
+
+	ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph);
+	if (!ah) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	if (logflags & NF_LOG_MACDECODE) {
+		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+		nf_log_dump_vlan(m, skb);
+		nf_log_buf_add(m, "MACPROTO=%04x ",
+			       ntohs(eth_hdr(skb)->h_proto));
+	}
+
+	nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
+		       ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
+	/* If it's for Ethernet and the lengths are OK, then log the ARP
+	 * payload.
+	 */
+	if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
+	    ah->ar_hln != ETH_ALEN ||
+	    ah->ar_pln != sizeof(__be32))
+		return;
+
+	ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp);
+	if (!ap) {
+		nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
+			       skb->len - sizeof(_arph));
+		return;
+	}
+	nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
+		       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+}
+
+static void
+nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
+			  unsigned int hooknum, const struct sk_buff *skb,
+			  const struct net_device *in,
+			  const struct net_device *out,
+			  const struct nf_loginfo *loginfo, const char *prefix,
+			  struct net *net)
+{
+	const struct net_device *physoutdev __maybe_unused;
+	const struct net_device *physindev __maybe_unused;
+
+	nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
+		       '0' + loginfo->u.log.level, prefix,
+			in ? in->name : "",
+			out ? out->name : "");
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	physindev = nf_bridge_get_physindev(skb, net);
+	if (physindev && in != physindev)
+		nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
+	physoutdev = nf_bridge_get_physoutdev(skb);
+	if (physoutdev && out != physoutdev)
+		nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
+#endif
+}
+
+static void nf_log_arp_packet(struct net *net, u_int8_t pf,
+			      unsigned int hooknum, const struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      const struct nf_loginfo *loginfo,
+			      const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	if (!nf_log_allowed(net))
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+				  prefix, net);
+	dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_arp_logger __read_mostly = {
+	.name		= "nf_log_arp",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_arp_packet,
+	.me		= THIS_MODULE,
+};
+
+static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+				   struct sock *sk)
+{
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
+		return;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk->sk_socket && sk->sk_socket->file) {
+		const struct cred *cred = sk->sk_socket->file->f_cred;
+
+		nf_log_buf_add(m, "UID=%u GID=%u ",
+			       from_kuid_munged(&init_user_ns, cred->fsuid),
+			       from_kgid_munged(&init_user_ns, cred->fsgid));
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static noinline_for_stack int
+nf_log_dump_tcp_header(struct nf_log_buf *m,
+		       const struct sk_buff *skb,
+		       u8 proto, int fragment,
+		       unsigned int offset,
+		       unsigned int logflags)
+{
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
+
+	/* Max length: 10 "PROTO=TCP " */
+	nf_log_buf_add(m, "PROTO=TCP ");
+
+	if (fragment)
+		return 0;
+
+	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+	th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+	if (!th) {
+		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+		return 1;
+	}
+
+	/* Max length: 20 "SPT=65535 DPT=65535 " */
+	nf_log_buf_add(m, "SPT=%u DPT=%u ",
+		       ntohs(th->source), ntohs(th->dest));
+	/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+	if (logflags & NF_LOG_TCPSEQ) {
+		nf_log_buf_add(m, "SEQ=%u ACK=%u ",
+			       ntohl(th->seq), ntohl(th->ack_seq));
+	}
+
+	/* Max length: 13 "WINDOW=65535 " */
+	nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window));
+	/* Max length: 9 "RES=0x3C " */
+	nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
+					    TCP_RESERVED_BITS) >> 22));
+	/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+	if (th->cwr)
+		nf_log_buf_add(m, "CWR ");
+	if (th->ece)
+		nf_log_buf_add(m, "ECE ");
+	if (th->urg)
+		nf_log_buf_add(m, "URG ");
+	if (th->ack)
+		nf_log_buf_add(m, "ACK ");
+	if (th->psh)
+		nf_log_buf_add(m, "PSH ");
+	if (th->rst)
+		nf_log_buf_add(m, "RST ");
+	if (th->syn)
+		nf_log_buf_add(m, "SYN ");
+	if (th->fin)
+		nf_log_buf_add(m, "FIN ");
+	/* Max length: 11 "URGP=65535 " */
+	nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+
+	if ((logflags & NF_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) {
+		unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr);
+		u8 _opt[60 - sizeof(struct tcphdr)];
+		unsigned int i;
+		const u8 *op;
+
+		op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
+					optsize, _opt);
+		if (!op) {
+			nf_log_buf_add(m, "OPT (TRUNCATED)");
+			return 1;
+		}
+
+		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+		nf_log_buf_add(m, "OPT (");
+		for (i = 0; i < optsize; i++)
+			nf_log_buf_add(m, "%02X", op[i]);
+
+		nf_log_buf_add(m, ") ");
+	}
+
+	return 0;
+}
+
+static noinline_for_stack int
+nf_log_dump_udp_header(struct nf_log_buf *m,
+		       const struct sk_buff *skb,
+		       u8 proto, int fragment,
+		       unsigned int offset)
+{
+	struct udphdr _udph;
+	const struct udphdr *uh;
+
+	if (proto == IPPROTO_UDP)
+		/* Max length: 10 "PROTO=UDP "     */
+		nf_log_buf_add(m, "PROTO=UDP ");
+	else	/* Max length: 14 "PROTO=UDPLITE " */
+		nf_log_buf_add(m, "PROTO=UDPLITE ");
+
+	if (fragment)
+		goto out;
+
+	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+	uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+	if (!uh) {
+		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+
+		return 1;
+	}
+
+	/* Max length: 20 "SPT=65535 DPT=65535 " */
+	nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ",
+		       ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len));
+
+out:
+	return 0;
+}
+
+/* One level of recursion won't kill us */
+static noinline_for_stack void
+dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
+		 const struct nf_loginfo *info,
+		 const struct sk_buff *skb, unsigned int iphoff)
+{
+	const struct iphdr *ih;
+	unsigned int logflags;
+	struct iphdr _iph;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+	if (!ih) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	/* Important fields:
+	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options.
+	 * Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 "
+	 */
+	nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
+
+	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+	nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+		       iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK,
+		       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+	/* Max length: 6 "CE DF MF " */
+	if (ntohs(ih->frag_off) & IP_CE)
+		nf_log_buf_add(m, "CE ");
+	if (ntohs(ih->frag_off) & IP_DF)
+		nf_log_buf_add(m, "DF ");
+	if (ntohs(ih->frag_off) & IP_MF)
+		nf_log_buf_add(m, "MF ");
+
+	/* Max length: 11 "FRAG:65535 " */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+	if ((logflags & NF_LOG_IPOPT) &&
+	    ih->ihl * 4 > sizeof(struct iphdr)) {
+		unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+		const unsigned char *op;
+		unsigned int i, optsize;
+
+		optsize = ih->ihl * 4 - sizeof(struct iphdr);
+		op = skb_header_pointer(skb, iphoff + sizeof(_iph),
+					optsize, _opt);
+		if (!op) {
+			nf_log_buf_add(m, "TRUNCATED");
+			return;
+		}
+
+		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+		nf_log_buf_add(m, "OPT (");
+		for (i = 0; i < optsize; i++)
+			nf_log_buf_add(m, "%02X", op[i]);
+		nf_log_buf_add(m, ") ");
+	}
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP:
+		if (nf_log_dump_tcp_header(m, skb, ih->protocol,
+					   ntohs(ih->frag_off) & IP_OFFSET,
+					   iphoff + ih->ihl * 4, logflags))
+			return;
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		if (nf_log_dump_udp_header(m, skb, ih->protocol,
+					   ntohs(ih->frag_off) & IP_OFFSET,
+					   iphoff + ih->ihl * 4))
+			return;
+		break;
+	case IPPROTO_ICMP: {
+		static const size_t required_len[NR_ICMP_TYPES + 1] = {
+			[ICMP_ECHOREPLY] = 4,
+			[ICMP_DEST_UNREACH] = 8 + sizeof(struct iphdr),
+			[ICMP_SOURCE_QUENCH] = 8 + sizeof(struct iphdr),
+			[ICMP_REDIRECT] = 8 + sizeof(struct iphdr),
+			[ICMP_ECHO] = 4,
+			[ICMP_TIME_EXCEEDED] = 8 + sizeof(struct iphdr),
+			[ICMP_PARAMETERPROB] = 8 + sizeof(struct iphdr),
+			[ICMP_TIMESTAMP] = 20,
+			[ICMP_TIMESTAMPREPLY] = 20,
+			[ICMP_ADDRESS] = 12,
+			[ICMP_ADDRESSREPLY] = 12 };
+		const struct icmphdr *ich;
+		struct icmphdr _icmph;
+
+		/* Max length: 11 "PROTO=ICMP " */
+		nf_log_buf_add(m, "PROTO=ICMP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					 sizeof(_icmph), &_icmph);
+		if (!ich) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		if (ich->type <= NR_ICMP_TYPES &&
+		    required_len[ich->type] &&
+		    skb->len - iphoff - ih->ihl * 4 < required_len[ich->type]) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		switch (ich->type) {
+		case ICMP_ECHOREPLY:
+		case ICMP_ECHO:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			nf_log_buf_add(m, "ID=%u SEQ=%u ",
+				       ntohs(ich->un.echo.id),
+				       ntohs(ich->un.echo.sequence));
+			break;
+
+		case ICMP_PARAMETERPROB:
+			/* Max length: 14 "PARAMETER=255 " */
+			nf_log_buf_add(m, "PARAMETER=%u ",
+				       ntohl(ich->un.gateway) >> 24);
+			break;
+		case ICMP_REDIRECT:
+			/* Max length: 24 "GATEWAY=255.255.255.255 " */
+			nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+			fallthrough;
+		case ICMP_DEST_UNREACH:
+		case ICMP_SOURCE_QUENCH:
+		case ICMP_TIME_EXCEEDED:
+			/* Max length: 3+maxlen */
+			if (!iphoff) { /* Only recurse once. */
+				nf_log_buf_add(m, "[");
+				dump_ipv4_packet(net, m, info, skb,
+						 iphoff + ih->ihl * 4 + sizeof(_icmph));
+				nf_log_buf_add(m, "] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ich->type == ICMP_DEST_UNREACH &&
+			    ich->code == ICMP_FRAG_NEEDED) {
+				nf_log_buf_add(m, "MTU=%u ",
+					       ntohs(ich->un.frag.mtu));
+			}
+		}
+		break;
+	}
+	/* Max Length */
+	case IPPROTO_AH: {
+		const struct ip_auth_hdr *ah;
+		struct ip_auth_hdr _ahdr;
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 9 "PROTO=AH " */
+		nf_log_buf_add(m, "PROTO=AH ");
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ah = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					sizeof(_ahdr), &_ahdr);
+		if (!ah) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+		break;
+	}
+	case IPPROTO_ESP: {
+		const struct ip_esp_hdr *eh;
+		struct ip_esp_hdr _esph;
+
+		/* Max length: 10 "PROTO=ESP " */
+		nf_log_buf_add(m, "PROTO=ESP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		eh = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					sizeof(_esph), &_esph);
+		if (!eh) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
+		break;
+	}
+	/* Max length: 10 "PROTO 255 " */
+	default:
+		nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+	if ((logflags & NF_LOG_UID) && !iphoff)
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
+
+	/* Max length: 16 "MARK=0xFFFFFFFF " */
+	if (!iphoff && skb->mark)
+		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+
+	/* Proto    Max log string length */
+	/* IP:	    40+46+6+11+127 = 230 */
+	/* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
+	/* UDP:     10+max(25,20) = 35 */
+	/* UDPLITE: 14+max(25,20) = 39 */
+	/* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+	/* ESP:     10+max(25)+15 = 50 */
+	/* AH:	    9+max(25)+15 = 49 */
+	/* unknown: 10 */
+
+	/* (ICMP allows recursion one level deep) */
+	/* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
+	/* maxlen = 230+   91  + 230 + 252 = 803 */
+}
+
+static noinline_for_stack void
+dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
+		 const struct nf_loginfo *info,
+		 const struct sk_buff *skb, unsigned int ip6hoff,
+		 int recurse)
+{
+	const struct ipv6hdr *ih;
+	unsigned int hdrlen = 0;
+	unsigned int logflags;
+	struct ipv6hdr _ip6h;
+	unsigned int ptr;
+	u8 currenthdr;
+	int fragment;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+	if (!ih) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+	nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+
+	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+	nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+		       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+		       (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+		       ih->hop_limit,
+		       (ntohl(*(__be32 *)ih) & 0x000fffff));
+
+	fragment = 0;
+	ptr = ip6hoff + sizeof(struct ipv6hdr);
+	currenthdr = ih->nexthdr;
+	while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
+		struct ipv6_opt_hdr _hdr;
+		const struct ipv6_opt_hdr *hp;
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		if (!hp) {
+			nf_log_buf_add(m, "TRUNCATED");
+			return;
+		}
+
+		/* Max length: 48 "OPT (...) " */
+		if (logflags & NF_LOG_IPOPT)
+			nf_log_buf_add(m, "OPT ( ");
+
+		switch (currenthdr) {
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr _fhdr;
+			const struct frag_hdr *fh;
+
+			nf_log_buf_add(m, "FRAG:");
+			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+						&_fhdr);
+			if (!fh) {
+				nf_log_buf_add(m, "TRUNCATED ");
+				return;
+			}
+
+			/* Max length: 6 "65535 " */
+			nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+			/* Max length: 11 "INCOMPLETE " */
+			if (fh->frag_off & htons(0x0001))
+				nf_log_buf_add(m, "INCOMPLETE ");
+
+			nf_log_buf_add(m, "ID:%08x ",
+				       ntohl(fh->identification));
+
+			if (ntohs(fh->frag_off) & 0xFFF8)
+				fragment = 1;
+
+			hdrlen = 8;
+			break;
+		}
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_HOPOPTS:
+			if (fragment) {
+				if (logflags & NF_LOG_IPOPT)
+					nf_log_buf_add(m, ")");
+				return;
+			}
+			hdrlen = ipv6_optlen(hp);
+			break;
+		/* Max Length */
+		case IPPROTO_AH:
+			if (logflags & NF_LOG_IPOPT) {
+				struct ip_auth_hdr _ahdr;
+				const struct ip_auth_hdr *ah;
+
+				/* Max length: 3 "AH " */
+				nf_log_buf_add(m, "AH ");
+
+				if (fragment) {
+					nf_log_buf_add(m, ")");
+					return;
+				}
+
+				ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+							&_ahdr);
+				if (!ah) {
+					/* Max length: 26 "INCOMPLETE [65535 bytes] )" */
+					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+						       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 15 "SPI=0xF1234567 */
+				nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+			}
+
+			hdrlen = ipv6_authlen(hp);
+			break;
+		case IPPROTO_ESP:
+			if (logflags & NF_LOG_IPOPT) {
+				struct ip_esp_hdr _esph;
+				const struct ip_esp_hdr *eh;
+
+				/* Max length: 4 "ESP " */
+				nf_log_buf_add(m, "ESP ");
+
+				if (fragment) {
+					nf_log_buf_add(m, ")");
+					return;
+				}
+
+				/* Max length: 26 "INCOMPLETE [65535 bytes] )" */
+				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+							&_esph);
+				if (!eh) {
+					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+						       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 16 "SPI=0xF1234567 )" */
+				nf_log_buf_add(m, "SPI=0x%x )",
+					       ntohl(eh->spi));
+			}
+			return;
+		default:
+			/* Max length: 20 "Unknown Ext Hdr 255" */
+			nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
+			return;
+		}
+		if (logflags & NF_LOG_IPOPT)
+			nf_log_buf_add(m, ") ");
+
+		currenthdr = hp->nexthdr;
+		ptr += hdrlen;
+	}
+
+	switch (currenthdr) {
+	case IPPROTO_TCP:
+		if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
+					   ptr, logflags))
+			return;
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
+			return;
+		break;
+	case IPPROTO_ICMPV6: {
+		struct icmp6hdr _icmp6h;
+		const struct icmp6hdr *ic;
+
+		/* Max length: 13 "PROTO=ICMPv6 " */
+		nf_log_buf_add(m, "PROTO=ICMPv6 ");
+
+		if (fragment)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+		if (!ic) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - ptr);
+			return;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		nf_log_buf_add(m, "TYPE=%u CODE=%u ",
+			       ic->icmp6_type, ic->icmp6_code);
+
+		switch (ic->icmp6_type) {
+		case ICMPV6_ECHO_REQUEST:
+		case ICMPV6_ECHO_REPLY:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			nf_log_buf_add(m, "ID=%u SEQ=%u ",
+				       ntohs(ic->icmp6_identifier),
+				       ntohs(ic->icmp6_sequence));
+			break;
+		case ICMPV6_MGM_QUERY:
+		case ICMPV6_MGM_REPORT:
+		case ICMPV6_MGM_REDUCTION:
+			break;
+
+		case ICMPV6_PARAMPROB:
+			/* Max length: 17 "POINTER=ffffffff " */
+			nf_log_buf_add(m, "POINTER=%08x ",
+				       ntohl(ic->icmp6_pointer));
+			fallthrough;
+		case ICMPV6_DEST_UNREACH:
+		case ICMPV6_PKT_TOOBIG:
+		case ICMPV6_TIME_EXCEED:
+			/* Max length: 3+maxlen */
+			if (recurse) {
+				nf_log_buf_add(m, "[");
+				dump_ipv6_packet(net, m, info, skb,
+						 ptr + sizeof(_icmp6h), 0);
+				nf_log_buf_add(m, "] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
+				nf_log_buf_add(m, "MTU=%u ",
+					       ntohl(ic->icmp6_mtu));
+			}
+		}
+		break;
+	}
+	/* Max length: 10 "PROTO=255 " */
+	default:
+		nf_log_buf_add(m, "PROTO=%u ", currenthdr);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+	if ((logflags & NF_LOG_UID) && recurse)
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
+
+	/* Max length: 16 "MARK=0xFFFFFFFF " */
+	if (recurse && skb->mark)
+		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+}
+
+static void dump_mac_header(struct nf_log_buf *m,
+			    const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & NF_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+		nf_log_dump_vlan(m, skb);
+		nf_log_buf_add(m, "MACPROTO=%04x ",
+			       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	nf_log_buf_add(m, "MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int i;
+
+		if (dev->type == ARPHRD_SIT) {
+			p -= ETH_HLEN;
+
+			if (p < skb->head)
+				p = NULL;
+		}
+
+		if (p) {
+			nf_log_buf_add(m, "%02x", *p++);
+			for (i = 1; i < dev->hard_header_len; i++)
+				nf_log_buf_add(m, ":%02x", *p++);
+		}
+
+		if (dev->type == ARPHRD_SIT) {
+			const struct iphdr *iph =
+				(struct iphdr *)skb_mac_header(skb);
+
+			nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+				       &iph->daddr);
+		}
+	}
+	nf_log_buf_add(m, " ");
+}
+
+static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+			     unsigned int hooknum, const struct sk_buff *skb,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     const struct nf_loginfo *loginfo,
+			     const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	if (!nf_log_allowed(net))
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in,
+				  out, loginfo, prefix, net);
+
+	if (in)
+		dump_mac_header(m, loginfo, skb);
+
+	dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb));
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip_logger __read_mostly = {
+	.name		= "nf_log_ipv4",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_ip_packet,
+	.me		= THIS_MODULE,
+};
+
+static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
+			      unsigned int hooknum, const struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      const struct nf_loginfo *loginfo,
+			      const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	if (!nf_log_allowed(net))
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
+				  loginfo, prefix, net);
+
+	if (in)
+		dump_mac_header(m, loginfo, skb);
+
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip6_logger __read_mostly = {
+	.name		= "nf_log_ipv6",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_ip6_packet,
+	.me		= THIS_MODULE,
+};
+
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+				  unsigned int hooknum,
+				  const struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  const struct nf_loginfo *loginfo,
+				  const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	if (!nf_log_allowed(net))
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+				  prefix, net);
+
+	dump_mac_header(m, loginfo, skb);
+
+	nf_log_buf_close(m);
+}
+
+static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
+				 unsigned int hooknum,
+				 const struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 const struct nf_loginfo *loginfo,
+				 const char *prefix)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nf_log_ip_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	case htons(ETH_P_IPV6):
+		nf_log_ip6_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	case htons(ETH_P_ARP):
+	case htons(ETH_P_RARP):
+		nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	default:
+		nf_log_unknown_packet(net, pf, hooknum, skb,
+				      in, out, loginfo, prefix);
+		break;
+	}
+}
+
+static struct nf_logger nf_netdev_logger __read_mostly = {
+	.name		= "nf_log_netdev",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_netdev_packet,
+	.me		= THIS_MODULE,
+};
+
+static struct nf_logger nf_bridge_logger __read_mostly = {
+	.name		= "nf_log_bridge",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_netdev_packet,
+	.me		= THIS_MODULE,
+};
+
+static int __net_init nf_log_syslog_net_init(struct net *net)
+{
+	int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+
+	if (ret)
+		return ret;
+
+	ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
+	if (ret)
+		goto err1;
+
+	ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
+	if (ret)
+		goto err2;
+
+	ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
+	if (ret)
+		goto err3;
+
+	ret = nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
+	if (ret)
+		goto err4;
+	return 0;
+err4:
+	nf_log_unset(net, &nf_netdev_logger);
+err3:
+	nf_log_unset(net, &nf_ip6_logger);
+err2:
+	nf_log_unset(net, &nf_arp_logger);
+err1:
+	nf_log_unset(net, &nf_ip_logger);
+	return ret;
+}
+
+static void __net_exit nf_log_syslog_net_exit(struct net *net)
+{
+	nf_log_unset(net, &nf_ip_logger);
+	nf_log_unset(net, &nf_arp_logger);
+	nf_log_unset(net, &nf_ip6_logger);
+	nf_log_unset(net, &nf_netdev_logger);
+	nf_log_unset(net, &nf_bridge_logger);
+}
+
+static struct pernet_operations nf_log_syslog_net_ops = {
+	.init = nf_log_syslog_net_init,
+	.exit = nf_log_syslog_net_exit,
+};
+
+static int __init nf_log_syslog_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&nf_log_syslog_net_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
+	if (ret < 0)
+		goto err1;
+
+	ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger);
+	if (ret < 0)
+		goto err2;
+
+	ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
+	if (ret < 0)
+		goto err3;
+
+	ret = nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
+	if (ret < 0)
+		goto err4;
+
+	ret = nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger);
+	if (ret < 0)
+		goto err5;
+
+	return 0;
+err5:
+	nf_log_unregister(&nf_netdev_logger);
+err4:
+	nf_log_unregister(&nf_ip6_logger);
+err3:
+	nf_log_unregister(&nf_arp_logger);
+err2:
+	nf_log_unregister(&nf_ip_logger);
+err1:
+	pr_err("failed to register logger\n");
+	unregister_pernet_subsys(&nf_log_syslog_net_ops);
+	return ret;
+}
+
+static void __exit nf_log_syslog_exit(void)
+{
+	unregister_pernet_subsys(&nf_log_syslog_net_ops);
+	nf_log_unregister(&nf_ip_logger);
+	nf_log_unregister(&nf_arp_logger);
+	nf_log_unregister(&nf_ip6_logger);
+	nf_log_unregister(&nf_netdev_logger);
+	nf_log_unregister(&nf_bridge_logger);
+}
+
+module_init(nf_log_syslog_init);
+module_exit(nf_log_syslog_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter syslog packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf_log_arp");
+MODULE_ALIAS("nf_log_bridge");
+MODULE_ALIAS("nf_log_ipv4");
+MODULE_ALIAS("nf_log_ipv6");
+MODULE_ALIAS("nf_log_netdev");
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0);
+MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
+MODULE_ALIAS_NF_LOGGER(3, 0);
+MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
new file mode 100644
index 000000000..98deef6cd
--- /dev/null
+++ b/net/netfilter/nf_nat_amanda.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+#define NAT_HELPER_NAME "amanda"
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_amanda =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
+static unsigned int help(struct sk_buff *skb,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
+			 unsigned int matchoff,
+			 unsigned int matchlen,
+			 struct nf_conntrack_expect *exp)
+{
+	char buffer[sizeof("65535")];
+	u_int16_t port;
+
+	/* Connection comes from client. */
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = IP_CT_DIR_ORIGINAL;
+
+	/* When you see the packet, we need to NAT it the same as the
+	 * this one (ie. same IP: it will be TCP and master is UDP). */
+	exp->expectfn = nf_nat_follow_master;
+
+	/* Try to get same port: if not, try to change it. */
+	port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port));
+	if (port == 0) {
+		nf_ct_helper_log(skb, exp->master, "all ports in use");
+		return NF_DROP;
+	}
+
+	sprintf(buffer, "%u", port);
+	if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
+				      protoff, matchoff, matchlen,
+				      buffer, strlen(buffer))) {
+		nf_ct_helper_log(skb, exp->master, "cannot mangle packet");
+		nf_ct_unexpect_related(exp);
+		return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+static void __exit nf_nat_amanda_fini(void)
+{
+	nf_nat_helper_unregister(&nat_helper_amanda);
+	RCU_INIT_POINTER(nf_nat_amanda_hook, NULL);
+	synchronize_rcu();
+}
+
+static int __init nf_nat_amanda_init(void)
+{
+	BUG_ON(nf_nat_amanda_hook != NULL);
+	nf_nat_helper_register(&nat_helper_amanda);
+	RCU_INIT_POINTER(nf_nat_amanda_hook, help);
+	return 0;
+}
+
+module_init(nf_nat_amanda_init);
+module_exit(nf_nat_amanda_fini);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
new file mode 100644
index 000000000..0fa5a0bbb
--- /dev/null
+++ b/net/netfilter/nf_nat_bpf.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable NAT Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in nf_nat BTF");
+
+/* bpf_ct_set_nat_info - Set source or destination nat address
+ *
+ * Set source or destination nat address of the newly allocated
+ * nf_conn before insertion. This must be invoked for referenced
+ * PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct	- Pointer to referenced nf_conn object, obtained using
+ *		  bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @addr	- Nat source/destination address
+ * @port	- Nat source/destination port. Non-positive values are
+ *		  interpreted as select a random port.
+ * @manip	- NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
+ */
+int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+			union nf_inet_addr *addr, int port,
+			enum nf_nat_manip_type manip)
+{
+	struct nf_conn *ct = (struct nf_conn *)nfct;
+	u16 proto = nf_ct_l3num(ct);
+	struct nf_nat_range2 range;
+
+	if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6)
+		return -EINVAL;
+
+	memset(&range, 0, sizeof(struct nf_nat_range2));
+	range.flags = NF_NAT_RANGE_MAP_IPS;
+	range.min_addr = *addr;
+	range.max_addr = range.min_addr;
+	if (port > 0) {
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+		range.min_proto.all = cpu_to_be16(port);
+		range.max_proto.all = range.min_proto.all;
+	}
+
+	return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(nf_nat_kfunc_set)
+BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_nat_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &nf_nat_kfunc_set,
+};
+
+int register_nf_nat_bpf(void)
+{
+	int ret;
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+					&nf_bpf_nat_kfunc_set);
+	if (ret)
+		return ret;
+
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+					 &nf_bpf_nat_kfunc_set);
+}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
new file mode 100644
index 000000000..e29e4ccb5
--- /dev/null
+++ b/net/netfilter/nf_nat_core.c
@@ -0,0 +1,1184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/gfp.h>
+#include <net/xfrm.h>
+#include <linux/siphash.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <uapi/linux/netfilter/nf_nat.h>
+
+#include "nf_internals.h"
+
+static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
+
+static DEFINE_MUTEX(nf_nat_proto_mutex);
+static unsigned int nat_net_id __read_mostly;
+
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static siphash_aligned_key_t nf_nat_hash_rnd;
+
+struct nf_nat_lookup_hook_priv {
+	struct nf_hook_entries __rcu *entries;
+
+	struct rcu_head rcu_head;
+};
+
+struct nf_nat_hooks_net {
+	struct nf_hook_ops *nat_hook_ops;
+	unsigned int users;
+};
+
+struct nat_net {
+	struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO];
+};
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (ct->status & statusbit) {
+		fl4->daddr = t->dst.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl4->saddr = t->src.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_sport = t->src.u.all;
+	}
+}
+
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (ct->status & statusbit) {
+		fl6->daddr = t->dst.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl6->saddr = t->src.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_sport = t->src.u.all;
+	}
+#endif
+}
+
+static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	unsigned  long statusbit;
+	u8 family;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return;
+
+	family = nf_ct_l3num(ct);
+	dir = CTINFO2DIR(ctinfo);
+	if (dir == IP_CT_DIR_ORIGINAL)
+		statusbit = IPS_DST_NAT;
+	else
+		statusbit = IPS_SRC_NAT;
+
+	switch (family) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	}
+}
+#endif /* CONFIG_XFRM */
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *net,
+	    const struct nf_conntrack_zone *zone,
+	    const struct nf_conntrack_tuple *tuple)
+{
+	unsigned int hash;
+	struct {
+		struct nf_conntrack_man src;
+		u32 net_mix;
+		u32 protonum;
+		u32 zone;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
+
+	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+
+	memset(&combined, 0, sizeof(combined));
+
+	/* Original src, to ensure we map it consistently if poss. */
+	combined.src = tuple->src;
+	combined.net_mix = net_hash_mix(net);
+	combined.protonum = tuple->dst.protonum;
+
+	/* Zone ID can be used provided its valid for both directions */
+	if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
+		combined.zone = zone->id;
+
+	hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
+
+	return reciprocal_scale(hash, nf_nat_htable_size);
+}
+
+/* Is this tuple already taken? (not by us) */
+static int
+nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
+		  const struct nf_conn *ignored_conntrack)
+{
+	/* Conntrack tracking doesn't keep track of outgoing tuples; only
+	 * incoming ones.  NAT means they don't have a fixed mapping,
+	 * so we invert the tuple and look for the incoming reply.
+	 *
+	 * We could keep a separate hash if this proves too slow.
+	 */
+	struct nf_conntrack_tuple reply;
+
+	nf_ct_invert_tuple(&reply, tuple);
+	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+
+static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
+				 const struct nf_nat_range2 *range)
+{
+	if (t->src.l3num == NFPROTO_IPV4)
+		return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+		       ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+
+	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+/* Is the manipable part of the tuple between min and max incl? */
+static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+			     enum nf_nat_manip_type maniptype,
+			     const union nf_conntrack_man_proto *min,
+			     const union nf_conntrack_man_proto *max)
+{
+	__be16 port;
+
+	switch (tuple->dst.protonum) {
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+	case IPPROTO_GRE: /* all fall though */
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+	case IPPROTO_DCCP:
+	case IPPROTO_SCTP:
+		if (maniptype == NF_NAT_MANIP_SRC)
+			port = tuple->src.u.all;
+		else
+			port = tuple->dst.u.all;
+
+		return ntohs(port) >= ntohs(min->all) &&
+		       ntohs(port) <= ntohs(max->all);
+	default:
+		return true;
+	}
+}
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range.
+ */
+static int in_range(const struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range2 *range)
+{
+	/* If we are supposed to map IPs, then we must be in the
+	 * range specified, otherwise let this drag us onto a new src IP.
+	 */
+	if (range->flags & NF_NAT_RANGE_MAP_IPS &&
+	    !nf_nat_inet_in_range(tuple, range))
+		return 0;
+
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+		return 1;
+
+	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
+				&range->min_proto, &range->max_proto);
+}
+
+static inline int
+same_src(const struct nf_conn *ct,
+	 const struct nf_conntrack_tuple *tuple)
+{
+	const struct nf_conntrack_tuple *t;
+
+	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	return (t->dst.protonum == tuple->dst.protonum &&
+		nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) &&
+		t->src.u.all == tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(struct net *net,
+		     const struct nf_conntrack_zone *zone,
+		     const struct nf_conntrack_tuple *tuple,
+		     struct nf_conntrack_tuple *result,
+		     const struct nf_nat_range2 *range)
+{
+	unsigned int h = hash_by_src(net, zone, tuple);
+	const struct nf_conn *ct;
+
+	hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+		if (same_src(ct, tuple) &&
+		    net_eq(net, nf_ct_net(ct)) &&
+		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+			/* Copy source part from reply tuple. */
+			nf_ct_invert_tuple(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(result, range))
+				return 1;
+		}
+	}
+	return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+ * src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
+ * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ * 1-65535, we don't do pro-rata allocation based on ports; we choose
+ * the ip with the lowest src-ip/dst-ip/proto usage.
+ */
+static void
+find_best_ips_proto(const struct nf_conntrack_zone *zone,
+		    struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range2 *range,
+		    const struct nf_conn *ct,
+		    enum nf_nat_manip_type maniptype)
+{
+	union nf_inet_addr *var_ipp;
+	unsigned int i, max;
+	/* Host order */
+	u32 minip, maxip, j, dist;
+	bool full_range;
+
+	/* No IP mapping?  Do nothing. */
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
+		return;
+
+	if (maniptype == NF_NAT_MANIP_SRC)
+		var_ipp = &tuple->src.u3;
+	else
+		var_ipp = &tuple->dst.u3;
+
+	/* Fast path: only one choice. */
+	if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
+		*var_ipp = range->min_addr;
+		return;
+	}
+
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
+	else
+		max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;
+
+	/* Hashing source and destination IPs gives a fairly even
+	 * spread in practice (if there are a small number of IPs
+	 * involved, there usually aren't that many connections
+	 * anyway).  The consistency means that servers see the same
+	 * client coming from the same IP (some Internet Banking sites
+	 * like this), even across reboots.
+	 */
+	j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
+		   range->flags & NF_NAT_RANGE_PERSISTENT ?
+			0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
+
+	full_range = false;
+	for (i = 0; i <= max; i++) {
+		/* If first bytes of the address are at the maximum, use the
+		 * distance. Otherwise use the full range.
+		 */
+		if (!full_range) {
+			minip = ntohl((__force __be32)range->min_addr.all[i]);
+			maxip = ntohl((__force __be32)range->max_addr.all[i]);
+			dist  = maxip - minip + 1;
+		} else {
+			minip = 0;
+			dist  = ~0;
+		}
+
+		var_ipp->all[i] = (__force __u32)
+			htonl(minip + reciprocal_scale(j, dist));
+		if (var_ipp->all[i] != range->max_addr.all[i])
+			full_range = true;
+
+		if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
+			j ^= (__force u32)tuple->dst.u3.all[i];
+	}
+}
+
+/* Alter the per-proto part of the tuple (depending on maniptype), to
+ * give a unique tuple in the given range if possible.
+ *
+ * Per-protocol part of tuple is initialized to the incoming packet.
+ */
+static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+					const struct nf_nat_range2 *range,
+					enum nf_nat_manip_type maniptype,
+					const struct nf_conn *ct)
+{
+	unsigned int range_size, min, max, i, attempts;
+	__be16 *keyptr;
+	u16 off;
+	static const unsigned int max_attempts = 128;
+
+	switch (tuple->dst.protonum) {
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		/* id is same for either direction... */
+		keyptr = &tuple->src.u.icmp.id;
+		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+			min = 0;
+			range_size = 65536;
+		} else {
+			min = ntohs(range->min_proto.icmp.id);
+			range_size = ntohs(range->max_proto.icmp.id) -
+				     ntohs(range->min_proto.icmp.id) + 1;
+		}
+		goto find_free_id;
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+	case IPPROTO_GRE:
+		/* If there is no master conntrack we are not PPTP,
+		   do not change tuples */
+		if (!ct->master)
+			return;
+
+		if (maniptype == NF_NAT_MANIP_SRC)
+			keyptr = &tuple->src.u.gre.key;
+		else
+			keyptr = &tuple->dst.u.gre.key;
+
+		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+			min = 1;
+			range_size = 65535;
+		} else {
+			min = ntohs(range->min_proto.gre.key);
+			range_size = ntohs(range->max_proto.gre.key) - min + 1;
+		}
+		goto find_free_id;
+#endif
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
+	case IPPROTO_DCCP:
+		if (maniptype == NF_NAT_MANIP_SRC)
+			keyptr = &tuple->src.u.all;
+		else
+			keyptr = &tuple->dst.u.all;
+
+		break;
+	default:
+		return;
+	}
+
+	/* If no range specified... */
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+		/* If it's dst rewrite, can't change port */
+		if (maniptype == NF_NAT_MANIP_DST)
+			return;
+
+		if (ntohs(*keyptr) < 1024) {
+			/* Loose convention: >> 512 is credential passing */
+			if (ntohs(*keyptr) < 512) {
+				min = 1;
+				range_size = 511 - min + 1;
+			} else {
+				min = 600;
+				range_size = 1023 - min + 1;
+			}
+		} else {
+			min = 1024;
+			range_size = 65535 - 1024 + 1;
+		}
+	} else {
+		min = ntohs(range->min_proto.all);
+		max = ntohs(range->max_proto.all);
+		if (unlikely(max < min))
+			swap(max, min);
+		range_size = max - min + 1;
+	}
+
+find_free_id:
+	if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
+		off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
+	else
+		off = get_random_u16();
+
+	attempts = range_size;
+	if (attempts > max_attempts)
+		attempts = max_attempts;
+
+	/* We are in softirq; doing a search of the entire range risks
+	 * soft lockup when all tuples are already used.
+	 *
+	 * If we can't find any free port from first offset, pick a new
+	 * one and try again, with ever smaller search window.
+	 */
+another_round:
+	for (i = 0; i < attempts; i++, off++) {
+		*keyptr = htons(min + off % range_size);
+		if (!nf_nat_used_tuple(tuple, ct))
+			return;
+	}
+
+	if (attempts >= range_size || attempts < 16)
+		return;
+	attempts /= 2;
+	off = get_random_u16();
+	goto another_round;
+}
+
+/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
+ * we change the source to map into the range. For NF_INET_PRE_ROUTING
+ * and NF_INET_LOCAL_OUT, we change the destination to map into the
+ * range. It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __nf_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_tuple *orig_tuple,
+		 const struct nf_nat_range2 *range,
+		 struct nf_conn *ct,
+		 enum nf_nat_manip_type maniptype)
+{
+	const struct nf_conntrack_zone *zone;
+	struct net *net = nf_ct_net(ct);
+
+	zone = nf_ct_zone(ct);
+
+	/* 1) If this srcip/proto/src-proto-part is currently mapped,
+	 * and that same mapping gives a unique tuple within the given
+	 * range, use that.
+	 *
+	 * This is only required for source (ie. NAT/masq) mappings.
+	 * So far, we don't do local source mappings, so multiple
+	 * manips not an issue.
+	 */
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
+		/* try the original tuple first */
+		if (in_range(orig_tuple, range)) {
+			if (!nf_nat_used_tuple(orig_tuple, ct)) {
+				*tuple = *orig_tuple;
+				return;
+			}
+		} else if (find_appropriate_src(net, zone,
+						orig_tuple, tuple, range)) {
+			pr_debug("get_unique_tuple: Found current src map\n");
+			if (!nf_nat_used_tuple(tuple, ct))
+				return;
+		}
+	}
+
+	/* 2) Select the least-used IP/proto combination in the given range */
+	*tuple = *orig_tuple;
+	find_best_ips_proto(zone, tuple, range, ct, maniptype);
+
+	/* 3) The per-protocol part of the manip is made to map into
+	 * the range to make a unique tuple.
+	 */
+
+	/* Only bother mapping if it's not already in range and unique */
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
+			    l4proto_in_range(tuple, maniptype,
+			          &range->min_proto,
+			          &range->max_proto) &&
+			    (range->min_proto.all == range->max_proto.all ||
+			     !nf_nat_used_tuple(tuple, ct)))
+				return;
+		} else if (!nf_nat_used_tuple(tuple, ct)) {
+			return;
+		}
+	}
+
+	/* Last chance: get protocol to try to obtain unique tuple. */
+	nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct);
+}
+
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	if (nat)
+		return nat;
+
+	if (!nf_ct_is_confirmed(ct))
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+	return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
+unsigned int
+nf_nat_setup_info(struct nf_conn *ct,
+		  const struct nf_nat_range2 *range,
+		  enum nf_nat_manip_type maniptype)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_tuple curr_tuple, new_tuple;
+
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
+	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+		maniptype != NF_NAT_MANIP_DST);
+
+	if (WARN_ON(nf_nat_initialized(ct, maniptype)))
+		return NF_DROP;
+
+	/* What we've got will look like inverse of reply. Normally
+	 * this is what is in the conntrack, except for prior
+	 * manipulations (future optimization: if num_manips == 0,
+	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+	 */
+	nf_ct_invert_tuple(&curr_tuple,
+			   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+
+	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+		struct nf_conntrack_tuple reply;
+
+		/* Alter conntrack table so will recognize replies. */
+		nf_ct_invert_tuple(&reply, &new_tuple);
+		nf_conntrack_alter_reply(ct, &reply);
+
+		/* Non-atomic: we own this at the moment. */
+		if (maniptype == NF_NAT_MANIP_SRC)
+			ct->status |= IPS_SRC_NAT;
+		else
+			ct->status |= IPS_DST_NAT;
+
+		if (nfct_help(ct) && !nfct_seqadj(ct))
+			if (!nfct_seqadj_ext_add(ct))
+				return NF_DROP;
+	}
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		unsigned int srchash;
+		spinlock_t *lock;
+
+		srchash = hash_by_src(net, nf_ct_zone(ct),
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
+		spin_lock_bh(lock);
+		hlist_add_head_rcu(&ct->nat_bysource,
+				   &nf_nat_bysource[srchash]);
+		spin_unlock_bh(lock);
+	}
+
+	/* It's done. */
+	if (maniptype == NF_NAT_MANIP_DST)
+		ct->status |= IPS_DST_NAT_DONE;
+	else
+		ct->status |= IPS_SRC_NAT_DONE;
+
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL(nf_nat_setup_info);
+
+static unsigned int
+__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
+{
+	/* Force range to this IP; let proto decide mapping for
+	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	 * Use reply in case it's already been mangled (eg local packet).
+	 */
+	union nf_inet_addr ip =
+		(manip == NF_NAT_MANIP_SRC ?
+		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+	struct nf_nat_range2 range = {
+		.flags		= NF_NAT_RANGE_MAP_IPS,
+		.min_addr	= ip,
+		.max_addr	= ip,
+	};
+	return nf_nat_setup_info(ct, &range, manip);
+}
+
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+	return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
+}
+EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
+
+/* Do packet manipulations according to nf_nat_setup_info. */
+unsigned int nf_nat_packet(struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo,
+			   unsigned int hooknum,
+			   struct sk_buff *skb)
+{
+	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned int verdict = NF_ACCEPT;
+	unsigned long statusbit;
+
+	if (mtype == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply dir. */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	/* Non-atomic: these bits don't change. */
+	if (ct->status & statusbit)
+		verdict = nf_nat_manip_pkt(skb, ct, mtype, dir);
+
+	return verdict;
+}
+EXPORT_SYMBOL_GPL(nf_nat_packet);
+
+static bool in_vrf_postrouting(const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (state->hook == NF_INET_POST_ROUTING &&
+	    netif_is_l3_master(state->out))
+		return true;
+#endif
+	return false;
+}
+
+unsigned int
+nf_nat_inet_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	/* maniptype == SRC for postrouting. */
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct || in_vrf_postrouting(state))
+		return NF_ACCEPT;
+
+	nat = nfct_nat(ct);
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		/* Only ICMPs can be IP_CT_IS_REPLY.  Fallthrough */
+	case IP_CT_NEW:
+		/* Seen it before?  This can happen for loopback, retrans,
+		 * or local packets.
+		 */
+		if (!nf_nat_initialized(ct, maniptype)) {
+			struct nf_nat_lookup_hook_priv *lpriv = priv;
+			struct nf_hook_entries *e = rcu_dereference(lpriv->entries);
+			unsigned int ret;
+			int i;
+
+			if (!e)
+				goto null_bind;
+
+			for (i = 0; i < e->num_hook_entries; i++) {
+				ret = e->hooks[i].hook(e->hooks[i].priv, skb,
+						       state);
+				if (ret != NF_ACCEPT)
+					return ret;
+				if (nf_nat_initialized(ct, maniptype))
+					goto do_nat;
+			}
+null_bind:
+			ret = nf_nat_alloc_null_binding(ct, state->hook);
+			if (ret != NF_ACCEPT)
+				return ret;
+		} else {
+			pr_debug("Already setup manip %s for ct %p (status bits 0x%lx)\n",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+				 ct, ct->status);
+			if (nf_nat_oif_changed(state->hook, ctinfo, nat,
+					       state->out))
+				goto oif_changed;
+		}
+		break;
+	default:
+		/* ESTABLISHED */
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
+			goto oif_changed;
+	}
+do_nat:
+	return nf_nat_packet(ct, ctinfo, state->hook, skb);
+
+oif_changed:
+	nf_ct_kill_acct(ct, ctinfo, skb);
+	return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_fn);
+
+struct nf_nat_proto_clean {
+	u8	l3proto;
+	u8	l4proto;
+};
+
+/* kill conntracks with affected NAT section */
+static int nf_nat_proto_remove(struct nf_conn *i, void *data)
+{
+	const struct nf_nat_proto_clean *clean = data;
+
+	if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
+	    (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
+		return 0;
+
+	return i->status & IPS_NAT_MASK ? 1 : 0;
+}
+
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+	unsigned int h;
+
+	h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+	hlist_del_rcu(&ct->nat_bysource);
+	spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+}
+
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+	if (nf_nat_proto_remove(ct, data))
+		return 1;
+
+	/* This module is being removed and conntrack has nat null binding.
+	 * Remove it from bysource hash, as the table will be freed soon.
+	 *
+	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+	 * will delete entry from already-freed table.
+	 */
+	if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
+		nf_nat_cleanup_conntrack(ct);
+
+	/* don't delete conntrack.  Although that would make things a lot
+	 * simpler, we'd end up flushing all conntracks on nat rmmod.
+	 */
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
+	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
+};
+
+static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+					  struct nf_nat_range2 *range)
+{
+	if (tb[CTA_PROTONAT_PORT_MIN]) {
+		range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+		range->max_proto.all = range->min_proto.all;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	if (tb[CTA_PROTONAT_PORT_MAX]) {
+		range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	return 0;
+}
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+				     const struct nf_conn *ct,
+				     struct nf_nat_range2 *range)
+{
+	struct nlattr *tb[CTA_PROTONAT_MAX+1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, CTA_PROTONAT_MAX, attr,
+					  protonat_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	return nf_nat_l4proto_nlattr_to_range(tb, range);
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+	[CTA_NAT_V4_MINIP]	= { .type = NLA_U32 },
+	[CTA_NAT_V4_MAXIP]	= { .type = NLA_U32 },
+	[CTA_NAT_V6_MINIP]	= { .len = sizeof(struct in6_addr) },
+	[CTA_NAT_V6_MAXIP]	= { .len = sizeof(struct in6_addr) },
+	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
+};
+
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V4_MINIP]) {
+		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V4_MAXIP])
+		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+	else
+		range->max_addr.ip = range->min_addr.ip;
+
+	return 0;
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V6_MINIP]) {
+		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+			   sizeof(struct in6_addr));
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V6_MAXIP])
+		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+			   sizeof(struct in6_addr));
+	else
+		range->max_addr = range->min_addr;
+
+	return 0;
+}
+
+static int
+nfnetlink_parse_nat(const struct nlattr *nat,
+		    const struct nf_conn *ct, struct nf_nat_range2 *range)
+{
+	struct nlattr *tb[CTA_NAT_MAX+1];
+	int err;
+
+	memset(range, 0, sizeof(*range));
+
+	err = nla_parse_nested_deprecated(tb, CTA_NAT_MAX, nat,
+					  nat_nla_policy, NULL);
+	if (err < 0)
+		return err;
+
+	switch (nf_ct_l3num(ct)) {
+	case NFPROTO_IPV4:
+		err = nf_nat_ipv4_nlattr_to_range(tb, range);
+		break;
+	case NFPROTO_IPV6:
+		err = nf_nat_ipv6_nlattr_to_range(tb, range);
+		break;
+	default:
+		err = -EPROTONOSUPPORT;
+		break;
+	}
+
+	if (err)
+		return err;
+
+	if (!tb[CTA_NAT_PROTO])
+		return 0;
+
+	return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+}
+
+/* This function is called under rcu_read_lock() */
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  const struct nlattr *attr)
+{
+	struct nf_nat_range2 range;
+	int err;
+
+	/* Should not happen, restricted to creating new conntracks
+	 * via ctnetlink.
+	 */
+	if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
+		return -EEXIST;
+
+	/* No NAT information has been passed, allocate the null-binding */
+	if (attr == NULL)
+		return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0;
+
+	err = nfnetlink_parse_nat(attr, ct, &range);
+	if (err < 0)
+		return err;
+
+	return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  const struct nlattr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static struct nf_ct_helper_expectfn follow_master_nat = {
+	.name		= "nat-follow-master",
+	.expectfn	= nf_nat_follow_master,
+};
+
+int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
+		       const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count)
+{
+	struct nat_net *nat_net = net_generic(net, nat_net_id);
+	struct nf_nat_hooks_net *nat_proto_net;
+	struct nf_nat_lookup_hook_priv *priv;
+	unsigned int hooknum = ops->hooknum;
+	struct nf_hook_ops *nat_ops;
+	int i, ret;
+
+	if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
+		return -EINVAL;
+
+	nat_proto_net = &nat_net->nat_proto_net[pf];
+
+	for (i = 0; i < ops_count; i++) {
+		if (orig_nat_ops[i].hooknum == hooknum) {
+			hooknum = i;
+			break;
+		}
+	}
+
+	if (WARN_ON_ONCE(i == ops_count))
+		return -EINVAL;
+
+	mutex_lock(&nf_nat_proto_mutex);
+	if (!nat_proto_net->nat_hook_ops) {
+		WARN_ON(nat_proto_net->users != 0);
+
+		nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL);
+		if (!nat_ops) {
+			mutex_unlock(&nf_nat_proto_mutex);
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < ops_count; i++) {
+			priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+			if (priv) {
+				nat_ops[i].priv = priv;
+				continue;
+			}
+			mutex_unlock(&nf_nat_proto_mutex);
+			while (i)
+				kfree(nat_ops[--i].priv);
+			kfree(nat_ops);
+			return -ENOMEM;
+		}
+
+		ret = nf_register_net_hooks(net, nat_ops, ops_count);
+		if (ret < 0) {
+			mutex_unlock(&nf_nat_proto_mutex);
+			for (i = 0; i < ops_count; i++)
+				kfree(nat_ops[i].priv);
+			kfree(nat_ops);
+			return ret;
+		}
+
+		nat_proto_net->nat_hook_ops = nat_ops;
+	}
+
+	nat_ops = nat_proto_net->nat_hook_ops;
+	priv = nat_ops[hooknum].priv;
+	if (WARN_ON_ONCE(!priv)) {
+		mutex_unlock(&nf_nat_proto_mutex);
+		return -EOPNOTSUPP;
+	}
+
+	ret = nf_hook_entries_insert_raw(&priv->entries, ops);
+	if (ret == 0)
+		nat_proto_net->users++;
+
+	mutex_unlock(&nf_nat_proto_mutex);
+	return ret;
+}
+
+void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
+			  unsigned int ops_count)
+{
+	struct nat_net *nat_net = net_generic(net, nat_net_id);
+	struct nf_nat_hooks_net *nat_proto_net;
+	struct nf_nat_lookup_hook_priv *priv;
+	struct nf_hook_ops *nat_ops;
+	int hooknum = ops->hooknum;
+	int i;
+
+	if (pf >= ARRAY_SIZE(nat_net->nat_proto_net))
+		return;
+
+	nat_proto_net = &nat_net->nat_proto_net[pf];
+
+	mutex_lock(&nf_nat_proto_mutex);
+	if (WARN_ON(nat_proto_net->users == 0))
+		goto unlock;
+
+	nat_proto_net->users--;
+
+	nat_ops = nat_proto_net->nat_hook_ops;
+	for (i = 0; i < ops_count; i++) {
+		if (nat_ops[i].hooknum == hooknum) {
+			hooknum = i;
+			break;
+		}
+	}
+	if (WARN_ON_ONCE(i == ops_count))
+		goto unlock;
+	priv = nat_ops[hooknum].priv;
+	nf_hook_entries_delete_raw(&priv->entries, ops);
+
+	if (nat_proto_net->users == 0) {
+		nf_unregister_net_hooks(net, nat_ops, ops_count);
+
+		for (i = 0; i < ops_count; i++) {
+			priv = nat_ops[i].priv;
+			kfree_rcu(priv, rcu_head);
+		}
+
+		nat_proto_net->nat_hook_ops = NULL;
+		kfree(nat_ops);
+	}
+unlock:
+	mutex_unlock(&nf_nat_proto_mutex);
+}
+
+static struct pernet_operations nat_net_ops = {
+	.id = &nat_net_id,
+	.size = sizeof(struct nat_net),
+};
+
+static const struct nf_nat_hook nat_hook = {
+	.parse_nat_setup	= nfnetlink_parse_nat_setup,
+#ifdef CONFIG_XFRM
+	.decode_session		= __nf_nat_decode_session,
+#endif
+	.manip_pkt		= nf_nat_manip_pkt,
+	.remove_nat_bysrc	= nf_nat_cleanup_conntrack,
+};
+
+static int __init nf_nat_init(void)
+{
+	int ret, i;
+
+	/* Leave them the same for the moment. */
+	nf_nat_htable_size = nf_conntrack_htable_size;
+	if (nf_nat_htable_size < CONNTRACK_LOCKS)
+		nf_nat_htable_size = CONNTRACK_LOCKS;
+
+	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+	if (!nf_nat_bysource)
+		return -ENOMEM;
+
+	for (i = 0; i < CONNTRACK_LOCKS; i++)
+		spin_lock_init(&nf_nat_locks[i]);
+
+	ret = register_pernet_subsys(&nat_net_ops);
+	if (ret < 0) {
+		kvfree(nf_nat_bysource);
+		return ret;
+	}
+
+	nf_ct_helper_expectfn_register(&follow_master_nat);
+
+	WARN_ON(nf_nat_hook != NULL);
+	RCU_INIT_POINTER(nf_nat_hook, &nat_hook);
+
+	ret = register_nf_nat_bpf();
+	if (ret < 0) {
+		RCU_INIT_POINTER(nf_nat_hook, NULL);
+		nf_ct_helper_expectfn_unregister(&follow_master_nat);
+		synchronize_net();
+		unregister_pernet_subsys(&nat_net_ops);
+		kvfree(nf_nat_bysource);
+	}
+
+	return ret;
+}
+
+static void __exit nf_nat_cleanup(void)
+{
+	struct nf_nat_proto_clean clean = {};
+
+	nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
+
+	nf_ct_helper_expectfn_unregister(&follow_master_nat);
+	RCU_INIT_POINTER(nf_nat_hook, NULL);
+
+	synchronize_net();
+	kvfree(nf_nat_bysource);
+	unregister_pernet_subsys(&nat_net_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(nf_nat_init);
+module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
new file mode 100644
index 000000000..c92a436d9
--- /dev/null
+++ b/net/netfilter/nf_nat_ftp.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* FTP extension for TCP NAT alteration. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/inet.h>
+#include <linux/tcp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+#define NAT_HELPER_NAME "ftp"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+/* FIXME: Time out? --RR */
+
+static struct nf_conntrack_nat_helper nat_helper_ftp =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
+static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type,
+			      char *buffer, size_t buflen,
+			      union nf_inet_addr *addr, u16 port)
+{
+	switch (type) {
+	case NF_CT_FTP_PORT:
+	case NF_CT_FTP_PASV:
+		return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
+				((unsigned char *)&addr->ip)[0],
+				((unsigned char *)&addr->ip)[1],
+				((unsigned char *)&addr->ip)[2],
+				((unsigned char *)&addr->ip)[3],
+				port >> 8,
+				port & 0xFF);
+	case NF_CT_FTP_EPRT:
+		if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+			return snprintf(buffer, buflen, "|1|%pI4|%u|",
+					&addr->ip, port);
+		else
+			return snprintf(buffer, buflen, "|2|%pI6|%u|",
+					&addr->ip6, port);
+	case NF_CT_FTP_EPSV:
+		return snprintf(buffer, buflen, "|||%u|", port);
+	}
+
+	return 0;
+}
+
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
+			       enum ip_conntrack_info ctinfo,
+			       enum nf_ct_ftp_type type,
+			       unsigned int protoff,
+			       unsigned int matchoff,
+			       unsigned int matchlen,
+			       struct nf_conntrack_expect *exp)
+{
+	union nf_inet_addr newaddr;
+	u_int16_t port;
+	int dir = CTINFO2DIR(ctinfo);
+	struct nf_conn *ct = exp->master;
+	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
+	unsigned int buflen;
+
+	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
+
+	/* Connection will come from wherever this packet goes, hence !dir */
+	newaddr = ct->tuplehash[!dir].tuple.dst.u3;
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = !dir;
+
+	/* When you see the packet, we need to NAT it the same as the
+	 * this one. */
+	exp->expectfn = nf_nat_follow_master;
+
+	port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port));
+	if (port == 0) {
+		nf_ct_helper_log(skb, exp->master, "all ports in use");
+		return NF_DROP;
+	}
+
+	buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer),
+				    &newaddr, port);
+	if (!buflen)
+		goto out;
+
+	pr_debug("calling nf_nat_mangle_tcp_packet\n");
+
+	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
+				      matchlen, buffer, buflen))
+		goto out;
+
+	return NF_ACCEPT;
+
+out:
+	nf_ct_helper_log(skb, ct, "cannot mangle packet");
+	nf_ct_unexpect_related(exp);
+	return NF_DROP;
+}
+
+static void __exit nf_nat_ftp_fini(void)
+{
+	nf_nat_helper_unregister(&nat_helper_ftp);
+	RCU_INIT_POINTER(nf_nat_ftp_hook, NULL);
+	synchronize_rcu();
+}
+
+static int __init nf_nat_ftp_init(void)
+{
+	BUG_ON(nf_nat_ftp_hook != NULL);
+	nf_nat_helper_register(&nat_helper_ftp);
+	RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp);
+	return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, const struct kernel_param *kp)
+{
+	pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_ftp_init);
+module_exit(nf_nat_ftp_fini);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
new file mode 100644
index 000000000..a95a25196
--- /dev/null
+++ b/net/netfilter/nf_nat_helper.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* nf_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+			    unsigned int dataoff,
+			    unsigned int match_offset,
+			    unsigned int match_len,
+			    const char *rep_buffer,
+			    unsigned int rep_len)
+{
+	unsigned char *data;
+
+	SKB_LINEAR_ASSERT(skb);
+	data = skb_network_header(skb) + dataoff;
+
+	/* move post-replacement */
+	memmove(data + match_offset + rep_len,
+		data + match_offset + match_len,
+		skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff +
+			     match_offset + match_len));
+
+	/* insert data from buffer */
+	memcpy(data + match_offset, rep_buffer, rep_len);
+
+	/* update skb info */
+	if (rep_len > match_len) {
+		pr_debug("nf_nat_mangle_packet: Extending packet by "
+			 "%u from %u bytes\n", rep_len - match_len, skb->len);
+		skb_put(skb, rep_len - match_len);
+	} else {
+		pr_debug("nf_nat_mangle_packet: Shrinking packet from "
+			 "%u from %u bytes\n", match_len - rep_len, skb->len);
+		__skb_trim(skb, skb->len + rep_len - match_len);
+	}
+
+	if (nf_ct_l3num((struct nf_conn *)skb_nfct(skb)) == NFPROTO_IPV4) {
+		/* fix IP hdr checksum information */
+		ip_hdr(skb)->tot_len = htons(skb->len);
+		ip_send_check(ip_hdr(skb));
+	} else
+		ipv6_hdr(skb)->payload_len =
+			htons(skb->len - sizeof(struct ipv6hdr));
+}
+
+/* Unusual, but possible case. */
+static bool enlarge_skb(struct sk_buff *skb, unsigned int extra)
+{
+	if (skb->len + extra > 65535)
+		return false;
+
+	if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
+		return false;
+
+	return true;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+				struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
+				unsigned int match_offset,
+				unsigned int match_len,
+				const char *rep_buffer,
+				unsigned int rep_len, bool adjust)
+{
+	struct tcphdr *tcph;
+	int oldlen, datalen;
+
+	if (skb_ensure_writable(skb, skb->len))
+		return false;
+
+	if (rep_len > match_len &&
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
+		return false;
+
+	tcph = (void *)skb->data + protoff;
+
+	oldlen = skb->len - protoff;
+	mangle_contents(skb, protoff + tcph->doff*4,
+			match_offset, match_len, rep_buffer, rep_len);
+
+	datalen = skb->len - protoff;
+
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+			   tcph, &tcph->check, datalen, oldlen);
+
+	if (adjust && rep_len != match_len)
+		nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
+				 (int)rep_len - (int)match_len);
+
+	return true;
+}
+EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
+ *       should be fairly easy to do.
+ */
+bool
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
+			 struct nf_conn *ct,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
+			 unsigned int match_offset,
+			 unsigned int match_len,
+			 const char *rep_buffer,
+			 unsigned int rep_len)
+{
+	struct udphdr *udph;
+	int datalen, oldlen;
+
+	if (skb_ensure_writable(skb, skb->len))
+		return false;
+
+	if (rep_len > match_len &&
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
+		return false;
+
+	udph = (void *)skb->data + protoff;
+
+	oldlen = skb->len - protoff;
+	mangle_contents(skb, protoff + sizeof(*udph),
+			match_offset, match_len, rep_buffer, rep_len);
+
+	/* update the length of the UDP packet */
+	datalen = skb->len - protoff;
+	udph->len = htons(datalen);
+
+	/* fix udp checksum if udp checksum was previously calculated */
+	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
+		return true;
+
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP,
+			   udph, &udph->check, datalen, oldlen);
+
+	return true;
+}
+EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void nf_nat_follow_master(struct nf_conn *ct,
+			  struct nf_conntrack_expect *exp)
+{
+	struct nf_nat_range2 range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* Change src to where master sends to */
+	range.flags = NF_NAT_RANGE_MAP_IPS;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+	range.min_proto = range.max_proto = exp->saved_proto;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.src.u3;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+EXPORT_SYMBOL(nf_nat_follow_master);
+
+u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port)
+{
+	static const unsigned int max_attempts = 128;
+	int range, attempts_left;
+	u16 min = port;
+
+	range = USHRT_MAX - port;
+	attempts_left = range;
+
+	if (attempts_left > max_attempts)
+		attempts_left = max_attempts;
+
+	/* Try to get same port: if not, try to change it. */
+	for (;;) {
+		int res;
+
+		exp->tuple.dst.u.tcp.port = htons(port);
+		res = nf_ct_expect_related(exp, 0);
+		if (res == 0)
+			return port;
+
+		if (res != -EBUSY || (--attempts_left < 0))
+			break;
+
+		port = min + prandom_u32_max(range);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_exp_find_port);
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
new file mode 100644
index 000000000..19c4fcc60
--- /dev/null
+++ b/net/netfilter/nf_nat_irc.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* IRC extension for TCP NAT alteration.
+ *
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+#define NAT_HELPER_NAME "irc"
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_irc =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
+static unsigned int help(struct sk_buff *skb,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
+			 unsigned int matchoff,
+			 unsigned int matchlen,
+			 struct nf_conntrack_expect *exp)
+{
+	char buffer[sizeof("4294967296 65635")];
+	struct nf_conn *ct = exp->master;
+	union nf_inet_addr newaddr;
+	u_int16_t port;
+
+	/* Reply comes from server. */
+	newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3;
+
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = IP_CT_DIR_REPLY;
+	exp->expectfn = nf_nat_follow_master;
+
+	port = nf_nat_exp_find_port(exp,
+				    ntohs(exp->saved_proto.tcp.port));
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use");
+		return NF_DROP;
+	}
+
+	/* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
+	 * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
+	 * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
+	 * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
+	 * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
+	 *
+	 * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
+	 *                        255.255.255.255==4294967296, 10 digits)
+	 * P:         bound port (min 1 d, max 5d (65635))
+	 * F:         filename   (min 1 d )
+	 * S:         size       (min 1 d )
+	 * 0x01, \n:  terminators
+	 */
+	/* AAA = "us", ie. where server normally talks to. */
+	snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port);
+	pr_debug("inserting '%s' == %pI4, port %u\n",
+		 buffer, &newaddr.ip, port);
+
+	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
+				      matchlen, buffer, strlen(buffer))) {
+		nf_ct_helper_log(skb, ct, "cannot mangle packet");
+		nf_ct_unexpect_related(exp);
+		return NF_DROP;
+	}
+
+	return NF_ACCEPT;
+}
+
+static void __exit nf_nat_irc_fini(void)
+{
+	nf_nat_helper_unregister(&nat_helper_irc);
+	RCU_INIT_POINTER(nf_nat_irc_hook, NULL);
+	synchronize_rcu();
+}
+
+static int __init nf_nat_irc_init(void)
+{
+	BUG_ON(nf_nat_irc_hook != NULL);
+	nf_nat_helper_register(&nat_helper_irc);
+	RCU_INIT_POINTER(nf_nat_irc_hook, help);
+	return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, const struct kernel_param *kp)
+{
+	pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_irc_init);
+module_exit(nf_nat_irc_fini);
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
new file mode 100644
index 000000000..1a506b0c6
--- /dev/null
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/nf_nat_masquerade.h>
+
+struct masq_dev_work {
+	struct work_struct work;
+	struct net *net;
+	netns_tracker ns_tracker;
+	union nf_inet_addr addr;
+	int ifindex;
+	int (*iter)(struct nf_conn *i, void *data);
+};
+
+#define MAX_MASQ_WORKER_COUNT	16
+
+static DEFINE_MUTEX(masq_mutex);
+static unsigned int masq_refcnt __read_mostly;
+static atomic_t masq_worker_count __read_mostly;
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+		       const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	struct nf_conn *ct;
+	struct nf_conn_nat *nat;
+	enum ip_conntrack_info ctinfo;
+	struct nf_nat_range2 newrange;
+	const struct rtable *rt;
+	__be32 newsrc, nh;
+
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	/* Source address is 0.0.0.0 - locally generated packet that is
+	 * probably not supposed to be masqueraded.
+	 */
+	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+		return NF_ACCEPT;
+
+	rt = skb_rtable(skb);
+	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+	if (!newsrc) {
+		pr_info("%s ate my IP address\n", out->name);
+		return NF_DROP;
+	}
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	/* Transfer from original range. */
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = newsrc;
+	newrange.max_addr.ip = newsrc;
+	newrange.min_proto   = range->min_proto;
+	newrange.max_proto   = range->max_proto;
+
+	/* Hand modified range to generic setup. */
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+	struct nf_ct_iter_data iter_data = {};
+	struct masq_dev_work *w;
+
+	w = container_of(work, struct masq_dev_work, work);
+
+	iter_data.net = w->net;
+	iter_data.data = (void *)w;
+	nf_ct_iterate_cleanup_net(w->iter, &iter_data);
+
+	put_net_track(w->net, &w->ns_tracker);
+	kfree(w);
+	atomic_dec(&masq_worker_count);
+	module_put(THIS_MODULE);
+}
+
+/* Iterate conntrack table in the background and remove conntrack entries
+ * that use the device/address being removed.
+ *
+ * In case too many work items have been queued already or memory allocation
+ * fails iteration is skipped, conntrack entries will time out eventually.
+ */
+static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
+				 int ifindex,
+				 int (*iter)(struct nf_conn *i, void *data),
+				 gfp_t gfp_flags)
+{
+	struct masq_dev_work *w;
+
+	if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
+		return;
+
+	net = maybe_get_net(net);
+	if (!net)
+		return;
+
+	if (!try_module_get(THIS_MODULE))
+		goto err_module;
+
+	w = kzalloc(sizeof(*w), gfp_flags);
+	if (w) {
+		/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
+		atomic_inc(&masq_worker_count);
+
+		INIT_WORK(&w->work, iterate_cleanup_work);
+		w->ifindex = ifindex;
+		w->net = net;
+		netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
+		w->iter = iter;
+		if (addr)
+			w->addr = *addr;
+		schedule_work(&w->work);
+		return;
+	}
+
+	module_put(THIS_MODULE);
+ err_module:
+	put_net(net);
+}
+
+static int device_cmp(struct nf_conn *i, void *arg)
+{
+	const struct nf_conn_nat *nat = nfct_nat(i);
+	const struct masq_dev_work *w = arg;
+
+	if (!nat)
+		return 0;
+	return nat->masq_index == w->ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event,
+			     void *ptr)
+{
+	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_DOWN) {
+		/* Device was downed.  Search entire table for
+		 * conntracks which were associated with that device,
+		 * and forget them.
+		 */
+
+		nf_nat_masq_schedule(net, NULL, dev->ifindex,
+				     device_cmp, GFP_KERNEL);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+	struct nf_conntrack_tuple *tuple;
+	struct masq_dev_work *w = ptr;
+
+	if (!device_cmp(ct, ptr))
+		return 0;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
+}
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event,
+			   void *ptr)
+{
+	const struct in_ifaddr *ifa = ptr;
+	const struct in_device *idev;
+	const struct net_device *dev;
+	union nf_inet_addr addr;
+
+	if (event != NETDEV_DOWN)
+		return NOTIFY_DONE;
+
+	/* The masq_dev_notifier will catch the case of the device going
+	 * down.  So if the inetdev is dead and being destroyed we have
+	 * no work to do.  Otherwise this is an individual address removal
+	 * and we have to perform the flush.
+	 */
+	idev = ifa->ifa_dev;
+	if (idev->dead)
+		return NOTIFY_DONE;
+
+	memset(&addr, 0, sizeof(addr));
+
+	addr.ip = ifa->ifa_address;
+
+	dev = idev->dev;
+	nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
+			     inet_cmp, GFP_KERNEL);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+		       const struct in6_addr *daddr, unsigned int srcprefs,
+		       struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return -EHOSTUNREACH;
+
+	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	struct in6_addr src;
+	struct nf_conn *ct;
+	struct nf_nat_range2 newrange;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+		return NF_DROP;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.in6	= src;
+	newrange.max_addr.in6	= src;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
+ *
+ * Defer it to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount of ipv6
+ * addresses being deleted), we also need to limit work item queue.
+ */
+static int masq_inet6_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	const struct net_device *dev;
+	union nf_inet_addr addr;
+
+	if (event != NETDEV_DOWN)
+		return NOTIFY_DONE;
+
+	dev = ifa->idev->dev;
+
+	memset(&addr, 0, sizeof(addr));
+
+	addr.in6 = ifa->addr;
+
+	nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
+			     GFP_ATOMIC);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_inet6_notifier = {
+	.notifier_call	= masq_inet6_event,
+};
+
+static int nf_nat_masquerade_ipv6_register_notifier(void)
+{
+	return register_inet6addr_notifier(&masq_inet6_notifier);
+}
+#else
+static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
+#endif
+
+int nf_nat_masquerade_inet_register_notifiers(void)
+{
+	int ret = 0;
+
+	mutex_lock(&masq_mutex);
+	if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
+		ret = -EOVERFLOW;
+		goto out_unlock;
+	}
+
+	/* check if the notifier was already set */
+	if (++masq_refcnt > 1)
+		goto out_unlock;
+
+	/* Register for device down reports */
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+	/* Register IP address change reports */
+	ret = register_inetaddr_notifier(&masq_inet_notifier);
+	if (ret)
+		goto err_unregister;
+
+	ret = nf_nat_masquerade_ipv6_register_notifier();
+	if (ret)
+		goto err_unreg_inet;
+
+	mutex_unlock(&masq_mutex);
+	return ret;
+err_unreg_inet:
+	unregister_inetaddr_notifier(&masq_inet_notifier);
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
+
+void nf_nat_masquerade_inet_unregister_notifiers(void)
+{
+	mutex_lock(&masq_mutex);
+	/* check if the notifiers still have clients */
+	if (--masq_refcnt > 0)
+		goto out_unlock;
+
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&masq_inet6_notifier);
+#endif
+out_unlock:
+	mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
new file mode 100644
index 000000000..48cc60084
--- /dev/null
+++ b/net/netfilter/nf_nat_proto.c
@@ -0,0 +1,1103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype);
+
+static void
+__udp_manip_pkt(struct sk_buff *skb,
+	        unsigned int iphdroff, struct udphdr *hdr,
+	        const struct nf_conntrack_tuple *tuple,
+	        enum nf_nat_manip_type maniptype, bool do_csum)
+{
+	__be16 *portptr, newport;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.udp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.udp.port;
+		portptr = &hdr->dest;
+	}
+	if (do_csum) {
+		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+					 false);
+		if (!hdr->check)
+			hdr->check = CSUM_MANGLED_0;
+	}
+	*portptr = newport;
+}
+
+static bool udp_manip_pkt(struct sk_buff *skb,
+			  unsigned int iphdroff, unsigned int hdroff,
+			  const struct nf_conntrack_tuple *tuple,
+			  enum nf_nat_manip_type maniptype)
+{
+	struct udphdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct udphdr *)(skb->data + hdroff);
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
+
+	return true;
+}
+
+static bool udplite_manip_pkt(struct sk_buff *skb,
+			      unsigned int iphdroff, unsigned int hdroff,
+			      const struct nf_conntrack_tuple *tuple,
+			      enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	struct udphdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct udphdr *)(skb->data + hdroff);
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
+#endif
+	return true;
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	struct sctphdr *hdr;
+	int hdrsize = 8;
+
+	/* This could be an inner header returned in imcp packet; in such
+	 * cases we cannot update the checksum field since it is outside
+	 * of the 8 bytes of transport layer headers we are guaranteed.
+	 */
+	if (skb->len >= hdroff + sizeof(*hdr))
+		hdrsize = sizeof(*hdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct sctphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		hdr->source = tuple->src.u.sctp.port;
+	} else {
+		/* Get rid of dst port */
+		hdr->dest = tuple->dst.u.sctp.port;
+	}
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		hdr->checksum = sctp_compute_cksum(skb, hdroff);
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+
+#endif
+	return true;
+}
+
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+	struct tcphdr *hdr;
+	__be16 *portptr, newport, oldport;
+	int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+	/* this could be a inner header returned in icmp packet; in such
+	   cases we cannot update the checksum field since it is outside of
+	   the 8 bytes of transport layer headers we are guaranteed */
+	if (skb->len >= hdroff + sizeof(struct tcphdr))
+		hdrsize = sizeof(struct tcphdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct tcphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.tcp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.tcp.port;
+		portptr = &hdr->dest;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
+	return true;
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct dccp_hdr *hdr;
+	__be16 *portptr, oldport, newport;
+	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+		hdrsize = sizeof(struct dccp_hdr);
+
+	if (skb_ensure_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		newport = tuple->src.u.dccp.port;
+		portptr = &hdr->dccph_sport;
+	} else {
+		newport = tuple->dst.u.dccp.port;
+		portptr = &hdr->dccph_dport;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+				 false);
+#endif
+	return true;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+	struct icmphdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmphdr *)(skb->data + hdroff);
+	switch (hdr->type) {
+	case ICMP_ECHO:
+	case ICMP_ECHOREPLY:
+	case ICMP_TIMESTAMP:
+	case ICMP_TIMESTAMPREPLY:
+	case ICMP_INFO_REQUEST:
+	case ICMP_INFO_REPLY:
+	case ICMP_ADDRESS:
+	case ICMP_ADDRESSREPLY:
+		break;
+	default:
+		return true;
+	}
+	inet_proto_csum_replace2(&hdr->checksum, skb,
+				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
+	hdr->un.echo.id = tuple->src.u.icmp.id;
+	return true;
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+		 unsigned int iphdroff, unsigned int hdroff,
+		 const struct nf_conntrack_tuple *tuple,
+		 enum nf_nat_manip_type maniptype)
+{
+	struct icmp6hdr *hdr;
+
+	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmp6hdr *)(skb->data + hdroff);
+	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
+	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
+		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+					 hdr->icmp6_identifier,
+					 tuple->src.u.icmp.id, false);
+		hdr->icmp6_identifier = tuple->src.u.icmp.id;
+	}
+	return true;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+	const struct gre_base_hdr *greh;
+	struct pptp_gre_header *pgreh;
+
+	/* pgreh includes two optional 32bit fields which are not required
+	 * to be there.  That's where the magic '8' comes from */
+	if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
+		return false;
+
+	greh = (void *)skb->data + hdroff;
+	pgreh = (struct pptp_gre_header *)greh;
+
+	/* we only have destination manip of a packet, since 'source key'
+	 * is not present in the packet itself */
+	if (maniptype != NF_NAT_MANIP_DST)
+		return true;
+
+	switch (greh->flags & GRE_VERSION) {
+	case GRE_VERSION_0:
+		/* We do not currently NAT any GREv0 packets.
+		 * Try to behave like "nf_nat_proto_unknown" */
+		break;
+	case GRE_VERSION_1:
+		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+		pgreh->call_id = tuple->dst.u.gre.key;
+		break;
+	default:
+		pr_debug("can't nat unknown GRE version\n");
+		return false;
+	}
+#endif
+	return true;
+}
+
+static bool l4proto_manip_pkt(struct sk_buff *skb,
+			      unsigned int iphdroff, unsigned int hdroff,
+			      const struct nf_conntrack_tuple *tuple,
+			      enum nf_nat_manip_type maniptype)
+{
+	switch (tuple->dst.protonum) {
+	case IPPROTO_TCP:
+		return tcp_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	case IPPROTO_UDP:
+		return udp_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	case IPPROTO_UDPLITE:
+		return udplite_manip_pkt(skb, iphdroff, hdroff,
+					 tuple, maniptype);
+	case IPPROTO_SCTP:
+		return sctp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_ICMP:
+		return icmp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_ICMPV6:
+		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
+					tuple, maniptype);
+	case IPPROTO_DCCP:
+		return dccp_manip_pkt(skb, iphdroff, hdroff,
+				      tuple, maniptype);
+	case IPPROTO_GRE:
+		return gre_manip_pkt(skb, iphdroff, hdroff,
+				     tuple, maniptype);
+	}
+
+	/* If we don't know protocol -- no error, pass it unmodified. */
+	return true;
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph;
+	unsigned int hdroff;
+
+	if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
+		return false;
+
+	iph = (void *)skb->data + iphdroff;
+	hdroff = iphdroff + iph->ihl * 4;
+
+	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+		return false;
+	iph = (void *)skb->data + iphdroff;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+		iph->saddr = target->src.u3.ip;
+	} else {
+		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+		iph->daddr = target->dst.u3.ip;
+	}
+	return true;
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *ipv6h;
+	__be16 frag_off;
+	int hdroff;
+	u8 nexthdr;
+
+	if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
+		return false;
+
+	ipv6h = (void *)skb->data + iphdroff;
+	nexthdr = ipv6h->nexthdr;
+	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+				  &nexthdr, &frag_off);
+	if (hdroff < 0)
+		goto manip_addr;
+
+	if ((frag_off & htons(~0x7)) == 0 &&
+	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+		return false;
+
+	/* must reload, offset might have changed */
+	ipv6h = (void *)skb->data + iphdroff;
+
+manip_addr:
+	if (maniptype == NF_NAT_MANIP_SRC)
+		ipv6h->saddr = target->src.u3.in6;
+	else
+		ipv6h->daddr = target->dst.u3.in6;
+
+#endif
+	return true;
+}
+
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+			      enum nf_nat_manip_type mtype,
+			      enum ip_conntrack_dir dir)
+{
+	struct nf_conntrack_tuple target;
+
+	/* We are aiming to look like inverse of other direction. */
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+
+	switch (target.src.l3num) {
+	case NFPROTO_IPV6:
+		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	case NFPROTO_IPV4:
+		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return NF_DROP;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+	__be32 oldip, newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = iph->saddr;
+		newip = t->src.u3.ip;
+	} else {
+		oldip = iph->daddr;
+		newip = t->dst.u3.ip;
+	}
+	inet_proto_csum_replace4(check, skb, oldip, newip, true);
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+	const struct in6_addr *oldip, *newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = &ipv6h->saddr;
+		newip = &t->src.u3.in6;
+	} else {
+		oldip = &ipv6h->daddr;
+		newip = &t->dst.u3.in6;
+	}
+	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+				  newip->s6_addr32, true);
+#endif
+}
+
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype)
+{
+	switch (t->src.l3num) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	}
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+			ip_hdrlen(skb);
+		skb->csum_offset = (void *)check - data;
+		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+					    proto, 0);
+	} else {
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), true);
+	}
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+			(data - (void *)skb->data);
+		skb->csum_offset = (void *)check - data;
+		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					  datalen, proto, 0);
+	} else {
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), true);
+	}
+}
+#endif
+
+void nf_nat_csum_recalc(struct sk_buff *skb,
+			u8 nfproto, u8 proto, void *data, __sum16 *check,
+			int datalen, int oldlen)
+{
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#endif
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+				  struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum)
+{
+	struct {
+		struct icmphdr	icmp;
+		struct iphdr	ip;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	unsigned int hdrlen = ip_hdrlen(skb);
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp.type == ICMP_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+				   &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt may reallocate */
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum =
+			csum_fold(skb_checksum(skb, hdrlen,
+					       skb->len - hdrlen, 0));
+	}
+
+	/* Change outer to look like the reply to an incoming packet */
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+	target.dst.protonum = IPPROTO_ICMP;
+	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static unsigned int
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   state->hook))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+	}
+
+	return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
+			const struct nf_hook_state *state)
+{
+	unsigned int ret;
+	__be32 daddr = ip_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+#ifdef CONFIG_XFRM
+static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
+{
+	struct sock *sk = skb->sk;
+	struct dst_entry *dst;
+	unsigned int hh_len;
+	struct flowi fl;
+	int err;
+
+	err = xfrm_decode_session(skb, &fl, family);
+	if (err < 0)
+		return err;
+
+	dst = skb_dst(skb);
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	if (!dst_hold_safe(dst))
+		return -EHOSTUNREACH;
+
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = skb_dst(skb)->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -ENOMEM;
+	return 0;
+}
+#endif
+
+static unsigned int
+nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	__be32 saddr = ip_hdr(skb)->saddr;
+	struct sock *sk = skb->sk;
+	unsigned int ret;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+
+	if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
+	    !inet_sk_transparent(sk))
+		skb_orphan(skb); /* TCP edemux obtained wrong socket */
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
+		const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int err;
+#endif
+	unsigned int ret;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.u3.ip !=
+		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
+		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+		     ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+#endif
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+	int err;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+			err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#endif
+	}
+	return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_pre_routing,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_out,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_local_fn,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_local_in,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+};
+
+int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
+				  ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
+
+void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+				    struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    unsigned int hooknum,
+				    unsigned int hdrlen)
+{
+	struct {
+		struct icmp6hdr	icmp6;
+		struct ipv6hdr	ip6;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+				   &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp6.icmp6_cksum = 0;
+		inside->icmp6.icmp6_cksum =
+			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					skb->len - hdrlen, IPPROTO_ICMPV6,
+					skb_checksum(skb, hdrlen,
+						     skb->len - hdrlen, 0));
+	}
+
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+	target.dst.protonum = IPPROTO_ICMPV6;
+	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static unsigned int
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	__be16 frag_off;
+	int hdrlen;
+	u8 nexthdr;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct)
+		return NF_ACCEPT;
+
+	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					  &nexthdr, &frag_off);
+
+		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+							     state->hook,
+							     hdrlen))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+	}
+
+	return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state)
+{
+	unsigned int ret;
+	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
+		const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int err;
+#endif
+	unsigned int ret;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+		return ret;
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+				      &ct->tuplehash[!dir].tuple.dst.u3) ||
+		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+		     ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+#endif
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+	int err;
+
+	ret = nf_nat_ipv6_fn(priv, skb, state);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+				      &ct->tuplehash[!dir].tuple.src.u3)) {
+			err = nf_ip6_route_me_harder(state->net, state->sk, skb);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#endif
+	}
+
+	return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_in,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_out,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_local_fn,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_fn,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+};
+
+int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
+				  ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
+
+void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
+#endif /* CONFIG_IPV6 */
+
+#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
+int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	int ret;
+
+	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
+		return -EINVAL;
+
+	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
+				 ARRAY_SIZE(nf_nat_ipv6_ops));
+	if (ret)
+		return ret;
+
+	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
+				 ARRAY_SIZE(nf_nat_ipv4_ops));
+	if (ret)
+		nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
+					ARRAY_SIZE(nf_nat_ipv6_ops));
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
+
+void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
+#endif /* NFT INET NAT */
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
new file mode 100644
index 000000000..5b37487d9
--- /dev/null
+++ b/net/netfilter/nf_nat_redirect.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/types.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/addrconf.h>
+#include <net/checksum.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
+
+static unsigned int
+nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		const union nf_inet_addr *newdst)
+{
+	struct nf_nat_range2 newrange;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	memset(&newrange, 0, sizeof(newrange));
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr	= *newdst;
+	newrange.max_addr	= *newdst;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
+}
+
+unsigned int
+nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		     unsigned int hooknum)
+{
+	union nf_inet_addr newdst = {};
+
+	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+		hooknum != NF_INET_LOCAL_OUT);
+
+	/* Local packets: make them go to loopback */
+	if (hooknum == NF_INET_LOCAL_OUT) {
+		newdst.ip = htonl(INADDR_LOOPBACK);
+	} else {
+		const struct in_device *indev;
+
+		indev = __in_dev_get_rcu(skb->dev);
+		if (indev) {
+			const struct in_ifaddr *ifa;
+
+			ifa = rcu_dereference(indev->ifa_list);
+			if (ifa)
+				newdst.ip = ifa->ifa_local;
+		}
+
+		if (!newdst.ip)
+			return NF_DROP;
+	}
+
+	return nf_nat_redirect(skb, range, &newdst);
+}
+EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
+
+static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
+
+static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope)
+{
+	unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr);
+
+	if (ifa_addr_type & IPV6_ADDR_MAPPED)
+		return false;
+
+	if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC)))
+		return false;
+
+	if (scope) {
+		unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK;
+
+		if (!(scope & ifa_scope))
+			return false;
+	}
+
+	return true;
+}
+
+unsigned int
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		     unsigned int hooknum)
+{
+	union nf_inet_addr newdst = {};
+
+	if (hooknum == NF_INET_LOCAL_OUT) {
+		newdst.in6 = loopback_addr;
+	} else {
+		unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr);
+		struct inet6_dev *idev;
+		bool addr = false;
+
+		idev = __in6_dev_get(skb->dev);
+		if (idev != NULL) {
+			const struct inet6_ifaddr *ifa;
+
+			read_lock_bh(&idev->lock);
+			list_for_each_entry(ifa, &idev->addr_list, if_list) {
+				if (!nf_nat_redirect_ipv6_usable(ifa, scope))
+					continue;
+
+				newdst.in6 = ifa->addr;
+				addr = true;
+				break;
+			}
+			read_unlock_bh(&idev->lock);
+		}
+
+		if (!addr)
+			return NF_DROP;
+	}
+
+	return nf_nat_redirect(skb, range, &newdst);
+}
+EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6);
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
new file mode 100644
index 000000000..cf4aeb299
--- /dev/null
+++ b/net/netfilter/nf_nat_sip.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* SIP extension for NAT alteration.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_nat_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008, 2011, 2012 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+#define NAT_HELPER_NAME "sip"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP NAT helper");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_sip =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int matchoff, unsigned int matchlen,
+				  const char *buffer, unsigned int buflen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct tcphdr *th;
+	unsigned int baseoff;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+		th = (struct tcphdr *)(skb->data + protoff);
+		baseoff = protoff + th->doff * 4;
+		matchoff += dataoff - baseoff;
+
+		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						protoff, matchoff, matchlen,
+						buffer, buflen, false))
+			return 0;
+	} else {
+		baseoff = protoff + sizeof(struct udphdr);
+		matchoff += dataoff - baseoff;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+					      protoff, matchoff, matchlen,
+					      buffer, buflen))
+			return 0;
+	}
+
+	/* Reload data pointer and adjust datalen value */
+	*dptr = skb->data + dataoff;
+	*datalen += buflen - matchlen;
+	return 1;
+}
+
+static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer,
+			    const union nf_inet_addr *addr, bool delim)
+{
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		return sprintf(buffer, "%pI4", &addr->ip);
+	else {
+		if (delim)
+			return sprintf(buffer, "[%pI6c]", &addr->ip6);
+		else
+			return sprintf(buffer, "%pI6c", &addr->ip6);
+	}
+}
+
+static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer,
+				 const union nf_inet_addr *addr, u16 port)
+{
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		return sprintf(buffer, "%pI4:%u", &addr->ip, port);
+	else
+		return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port);
+}
+
+static int map_addr(struct sk_buff *skb, unsigned int protoff,
+		    unsigned int dataoff,
+		    const char **dptr, unsigned int *datalen,
+		    unsigned int matchoff, unsigned int matchlen,
+		    union nf_inet_addr *addr, __be16 port)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+	unsigned int buflen;
+	union nf_inet_addr newaddr;
+	__be16 newport;
+
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, addr) &&
+	    ct->tuplehash[dir].tuple.src.u.udp.port == port) {
+		newaddr = ct->tuplehash[!dir].tuple.dst.u3;
+		newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
+	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
+		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+		newaddr = ct->tuplehash[!dir].tuple.src.u3;
+		newport = ct_sip_info->forced_dport ? :
+			  ct->tuplehash[!dir].tuple.src.u.udp.port;
+	} else
+		return 1;
+
+	if (nf_inet_addr_cmp(&newaddr, addr) && newport == port)
+		return 1;
+
+	buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport));
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
+}
+
+static int map_sip_addr(struct sk_buff *skb, unsigned int protoff,
+			unsigned int dataoff,
+			const char **dptr, unsigned int *datalen,
+			enum sip_header_types type)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchlen, matchoff;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
+				    &matchoff, &matchlen, &addr, &port) <= 0)
+		return 1;
+	return map_addr(skb, protoff, dataoff, dptr, datalen,
+			matchoff, matchlen, &addr, port);
+}
+
+static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	unsigned int coff, matchoff, matchlen;
+	enum sip_header_types hdr;
+	union nf_inet_addr addr;
+	__be16 port;
+	int request, in_header;
+
+	/* Basic rules: requests and responses. */
+	if (strncasecmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
+		if (ct_sip_parse_request(ct, *dptr, *datalen,
+					 &matchoff, &matchlen,
+					 &addr, &port) > 0 &&
+		    !map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle SIP message");
+			return NF_DROP;
+		}
+		request = 1;
+	} else
+		request = 0;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP)
+		hdr = SIP_HDR_VIA_TCP;
+	else
+		hdr = SIP_HDR_VIA_UDP;
+
+	/* Translate topmost Via header and parameters */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    hdr, NULL, &matchoff, &matchlen,
+				    &addr, &port) > 0) {
+		unsigned int olen, matchend, poff, plen, buflen, n;
+		char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+
+		/* We're only interested in headers related to this
+		 * connection */
+		if (request) {
+			if (!nf_inet_addr_cmp(&addr,
+					&ct->tuplehash[dir].tuple.src.u3) ||
+			    port != ct->tuplehash[dir].tuple.src.u.udp.port)
+				goto next;
+		} else {
+			if (!nf_inet_addr_cmp(&addr,
+					&ct->tuplehash[dir].tuple.dst.u3) ||
+			    port != ct->tuplehash[dir].tuple.dst.u.udp.port)
+				goto next;
+		}
+
+		olen = *datalen;
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle Via header");
+			return NF_DROP;
+		}
+
+		matchend = matchoff + matchlen + *datalen - olen;
+
+		/* The maddr= parameter (RFC 2361) specifies where to send
+		 * the reply. */
+		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+					       "maddr=", &poff, &plen,
+					       &addr, true) > 0 &&
+		    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) &&
+		    !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) {
+			buflen = sip_sprintf_addr(ct, buffer,
+					&ct->tuplehash[!dir].tuple.dst.u3,
+					true);
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen)) {
+				nf_ct_helper_log(skb, ct, "cannot mangle maddr");
+				return NF_DROP;
+			}
+		}
+
+		/* The received= parameter (RFC 2361) contains the address
+		 * from which the server received the request. */
+		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+					       "received=", &poff, &plen,
+					       &addr, false) > 0 &&
+		    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) &&
+		    !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) {
+			buflen = sip_sprintf_addr(ct, buffer,
+					&ct->tuplehash[!dir].tuple.src.u3,
+					false);
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen)) {
+				nf_ct_helper_log(skb, ct, "cannot mangle received");
+				return NF_DROP;
+			}
+		}
+
+		/* The rport= parameter (RFC 3581) contains the port number
+		 * from which the server received the request. */
+		if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
+						 "rport=", &poff, &plen,
+						 &n) > 0 &&
+		    htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
+		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
+			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
+			buflen = sprintf(buffer, "%u", ntohs(p));
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen)) {
+				nf_ct_helper_log(skb, ct, "cannot mangle rport");
+				return NF_DROP;
+			}
+		}
+	}
+
+next:
+	/* Translate Contact headers */
+	coff = 0;
+	in_header = 0;
+	while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
+				       SIP_HDR_CONTACT, &in_header,
+				       &matchoff, &matchlen,
+				       &addr, &port) > 0) {
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen,
+			      &addr, port)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle contact");
+			return NF_DROP;
+		}
+	}
+
+	if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) {
+		nf_ct_helper_log(skb, ct, "cannot mangle SIP from/to");
+		return NF_DROP;
+	}
+
+	/* Mangle destination port for Cisco phones, then fix up checksums */
+	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
+		struct udphdr *uh;
+
+		if (skb_ensure_writable(skb, skb->len)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
+			return NF_DROP;
+		}
+
+		uh = (void *)skb->data + protoff;
+		uh->dest = ct_sip_info->forced_dport;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff,
+					      0, 0, NULL, 0)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
+			return NF_DROP;
+		}
+	}
+
+	return NF_ACCEPT;
+}
+
+static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
+				  s16 off)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
+		return;
+
+	th = (struct tcphdr *)(skb->data + protoff);
+	nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
+}
+
+/* Handles expected signalling connections and media streams */
+static void nf_nat_sip_expected(struct nf_conn *ct,
+				struct nf_conntrack_expect *exp)
+{
+	struct nf_conn_help *help = nfct_help(ct->master);
+	struct nf_conntrack_expect *pair_exp;
+	int range_set_for_snat = 0;
+	struct nf_nat_range2 range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+	range.min_proto = range.max_proto = exp->saved_proto;
+	range.min_addr = range.max_addr = exp->saved_addr;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+
+	/* Do media streams SRC manip according with the parameters
+	 * found in the paired expectation.
+	 */
+	if (exp->class != SIP_EXPECT_SIGNALLING) {
+		spin_lock_bh(&nf_conntrack_expect_lock);
+		hlist_for_each_entry(pair_exp, &help->expectations, lnode) {
+			if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) &&
+			    pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum &&
+			    nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) &&
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) {
+				range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+				range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all;
+				range.min_addr = range.max_addr = pair_exp->tuple.dst.u3;
+				range_set_for_snat = 1;
+				break;
+			}
+		}
+		spin_unlock_bh(&nf_conntrack_expect_lock);
+	}
+
+	/* When no paired expectation has been found, change src to
+	 * where master sends to, but only if the connection actually came
+	 * from the same source.
+	 */
+	if (!range_set_for_snat &&
+	    nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+			     &ct->master->tuplehash[exp->dir].tuple.src.u3)) {
+		range.flags = NF_NAT_RANGE_MAP_IPS;
+		range.min_addr = range.max_addr
+			= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		range_set_for_snat = 1;
+	}
+
+	/* Perform SRC manip. */
+	if (range_set_for_snat)
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
+				      unsigned int dataoff,
+				      const char **dptr, unsigned int *datalen,
+				      struct nf_conntrack_expect *exp,
+				      unsigned int matchoff,
+				      unsigned int matchlen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	union nf_inet_addr newaddr;
+	u_int16_t port;
+	__be16 srcport;
+	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+	unsigned int buflen;
+
+	/* Connection will come from reply */
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+			     &ct->tuplehash[!dir].tuple.dst.u3))
+		newaddr = exp->tuple.dst.u3;
+	else
+		newaddr = ct->tuplehash[!dir].tuple.dst.u3;
+
+	/* If the signalling port matches the connection's source port in the
+	 * original direction, try to use the destination port in the opposite
+	 * direction. */
+	srcport = ct_sip_info->forced_dport ? :
+		  ct->tuplehash[dir].tuple.src.u.udp.port;
+	if (exp->tuple.dst.u.udp.port == srcport)
+		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+	else
+		port = ntohs(exp->tuple.dst.u.udp.port);
+
+	exp->saved_addr = exp->tuple.dst.u3;
+	exp->tuple.dst.u3 = newaddr;
+	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+	exp->dir = !dir;
+	exp->expectfn = nf_nat_sip_expected;
+
+	port = nf_nat_exp_find_port(exp, port);
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use for SIP");
+		return NF_DROP;
+	}
+
+	if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) ||
+	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
+		buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port);
+		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+				   matchoff, matchlen, buffer, buflen)) {
+			nf_ct_helper_log(skb, ct, "cannot mangle packet");
+			goto err;
+		}
+	}
+	return NF_ACCEPT;
+
+err:
+	nf_ct_unexpect_related(exp);
+	return NF_DROP;
+}
+
+static int mangle_content_len(struct sk_buff *skb, unsigned int protoff,
+			      unsigned int dataoff,
+			      const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchoff, matchlen;
+	char buffer[sizeof("65536")];
+	int buflen, c_len;
+
+	/* Get actual SDP length */
+	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+				  SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+				  &matchoff, &matchlen) <= 0)
+		return 0;
+	c_len = *datalen - matchoff + strlen("v=");
+
+	/* Now, update SDP length */
+	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
+			      &matchoff, &matchlen) <= 0)
+		return 0;
+
+	buflen = sprintf(buffer, "%u", c_len);
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
+}
+
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff,
+			     unsigned int dataoff,
+			     const char **dptr, unsigned int *datalen,
+			     unsigned int sdpoff,
+			     enum sdp_header_types type,
+			     enum sdp_header_types term,
+			     char *buffer, int buflen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchlen, matchoff;
+
+	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
+				  &matchoff, &matchlen) <= 0)
+		return -ENOENT;
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL;
+}
+
+static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int sdpoff,
+				    enum sdp_header_types type,
+				    enum sdp_header_types term,
+				    const union nf_inet_addr *addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	char buffer[INET6_ADDRSTRLEN];
+	unsigned int buflen;
+
+	buflen = sip_sprintf_addr(ct, buffer, addr, false);
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen,
+			      sdpoff, type, term, buffer, buflen))
+		return 0;
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int matchoff,
+				    unsigned int matchlen,
+				    u_int16_t port)
+{
+	char buffer[sizeof("nnnnn")];
+	unsigned int buflen;
+
+	buflen = sprintf(buffer, "%u", port);
+	if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			   matchoff, matchlen, buffer, buflen))
+		return 0;
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff,
+				       unsigned int dataoff,
+				       const char **dptr, unsigned int *datalen,
+				       unsigned int sdpoff,
+				       const union nf_inet_addr *addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	char buffer[INET6_ADDRSTRLEN];
+	unsigned int buflen;
+
+	/* Mangle session description owner and contact addresses */
+	buflen = sip_sprintf_addr(ct, buffer, addr, false);
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+			      SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen))
+		return 0;
+
+	switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+				  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+				  buffer, buflen)) {
+	case 0:
+	/*
+	 * RFC 2327:
+	 *
+	 * Session description
+	 *
+	 * c=* (connection information - not required if included in all media)
+	 */
+	case -ENOENT:
+		break;
+	default:
+		return 0;
+	}
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
+				     const char **dptr, unsigned int *datalen,
+				     struct nf_conntrack_expect *rtp_exp,
+				     struct nf_conntrack_expect *rtcp_exp,
+				     unsigned int mediaoff,
+				     unsigned int medialen,
+				     union nf_inet_addr *rtp_addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	u_int16_t port;
+
+	/* Connection will come from reply */
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+			     &ct->tuplehash[!dir].tuple.dst.u3))
+		*rtp_addr = rtp_exp->tuple.dst.u3;
+	else
+		*rtp_addr = ct->tuplehash[!dir].tuple.dst.u3;
+
+	rtp_exp->saved_addr = rtp_exp->tuple.dst.u3;
+	rtp_exp->tuple.dst.u3 = *rtp_addr;
+	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+	rtp_exp->dir = !dir;
+	rtp_exp->expectfn = nf_nat_sip_expected;
+
+	rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3;
+	rtcp_exp->tuple.dst.u3 = *rtp_addr;
+	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+	rtcp_exp->dir = !dir;
+	rtcp_exp->expectfn = nf_nat_sip_expected;
+
+	/* Try to get same pair of ports: if not, try to change them. */
+	for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+	     port != 0; port += 2) {
+		int ret;
+
+		rtp_exp->tuple.dst.u.udp.port = htons(port);
+		ret = nf_ct_expect_related(rtp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0) {
+			port = 0;
+			break;
+		}
+		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
+		ret = nf_ct_expect_related(rtcp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
+		if (ret == 0)
+			break;
+		else if (ret == -EBUSY) {
+			nf_ct_unexpect_related(rtp_exp);
+			continue;
+		} else if (ret < 0) {
+			nf_ct_unexpect_related(rtp_exp);
+			port = 0;
+			break;
+		}
+	}
+
+	if (port == 0) {
+		nf_ct_helper_log(skb, ct, "all ports in use for SDP media");
+		goto err1;
+	}
+
+	/* Update media port. */
+	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
+	    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+			     mediaoff, medialen, port)) {
+		nf_ct_helper_log(skb, ct, "cannot mangle SDP message");
+		goto err2;
+	}
+
+	return NF_ACCEPT;
+
+err2:
+	nf_ct_unexpect_related(rtp_exp);
+	nf_ct_unexpect_related(rtcp_exp);
+err1:
+	return NF_DROP;
+}
+
+static struct nf_ct_helper_expectfn sip_nat = {
+	.name		= "sip",
+	.expectfn	= nf_nat_sip_expected,
+};
+
+static void __exit nf_nat_sip_fini(void)
+{
+	nf_nat_helper_unregister(&nat_helper_sip);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
+	nf_ct_helper_expectfn_unregister(&sip_nat);
+	synchronize_rcu();
+}
+
+static const struct nf_nat_sip_hooks sip_hooks = {
+	.msg		= nf_nat_sip,
+	.seq_adjust	= nf_nat_sip_seq_adjust,
+	.expect		= nf_nat_sip_expect,
+	.sdp_addr	= nf_nat_sdp_addr,
+	.sdp_port	= nf_nat_sdp_port,
+	.sdp_session	= nf_nat_sdp_session,
+	.sdp_media	= nf_nat_sdp_media,
+};
+
+static int __init nf_nat_sip_init(void)
+{
+	BUG_ON(nf_nat_sip_hooks != NULL);
+	nf_nat_helper_register(&nat_helper_sip);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
+	nf_ct_helper_expectfn_register(&sip_nat);
+	return 0;
+}
+
+module_init(nf_nat_sip_init);
+module_exit(nf_nat_sip_fini);
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
new file mode 100644
index 000000000..1a591132d
--- /dev/null
+++ b/net/netfilter/nf_nat_tftp.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ */
+
+#include <linux/module.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+#define NAT_HELPER_NAME "tftp"
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME);
+
+static struct nf_conntrack_nat_helper nat_helper_tftp =
+	NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME);
+
+static unsigned int help(struct sk_buff *skb,
+			 enum ip_conntrack_info ctinfo,
+			 struct nf_conntrack_expect *exp)
+{
+	const struct nf_conn *ct = exp->master;
+
+	exp->saved_proto.udp.port
+		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+	exp->dir = IP_CT_DIR_REPLY;
+	exp->expectfn = nf_nat_follow_master;
+	if (nf_ct_expect_related(exp, 0) != 0) {
+		nf_ct_helper_log(skb, exp->master, "cannot add expectation");
+		return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+static void __exit nf_nat_tftp_fini(void)
+{
+	nf_nat_helper_unregister(&nat_helper_tftp);
+	RCU_INIT_POINTER(nf_nat_tftp_hook, NULL);
+	synchronize_rcu();
+}
+
+static int __init nf_nat_tftp_init(void)
+{
+	BUG_ON(nf_nat_tftp_hook != NULL);
+	nf_nat_helper_register(&nat_helper_tftp);
+	RCU_INIT_POINTER(nf_nat_tftp_hook, help);
+	return 0;
+}
+
+module_init(nf_nat_tftp_init);
+module_exit(nf_nat_tftp_fini);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
new file mode 100644
index 000000000..e2f334f70
--- /dev/null
+++ b/net/netfilter/nf_queue.c
@@ -0,0 +1,356 @@
+/*
+ * Rusty Russell (C)2000 -- This code is GPL.
+ * Patrick McHardy (c) 2006-2012
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
+
+#include "nf_internals.h"
+
+static const struct nf_queue_handler __rcu *nf_queue_handler;
+
+/*
+ * Hook for nfnetlink_queue to register its queue handler.
+ * We do this so that most of the NFQUEUE code can be modular.
+ *
+ * Once the queue is registered it must reinject all packets it
+ * receives, no matter what.
+ */
+
+void nf_register_queue_handler(const struct nf_queue_handler *qh)
+{
+	/* should never happen, we only have one queueing backend in kernel */
+	WARN_ON(rcu_access_pointer(nf_queue_handler));
+	rcu_assign_pointer(nf_queue_handler, qh);
+}
+EXPORT_SYMBOL(nf_register_queue_handler);
+
+/* The caller must flush their queue before this */
+void nf_unregister_queue_handler(void)
+{
+	RCU_INIT_POINTER(nf_queue_handler, NULL);
+}
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+	sock_gen_put(sk);
+#else
+	sock_put(sk);
+#endif
+}
+
+static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+{
+	struct nf_hook_state *state = &entry->state;
+
+	/* Release those devices we held, or Alexey will kill me. */
+	dev_put(state->in);
+	dev_put(state->out);
+	if (state->sk)
+		nf_queue_sock_put(state->sk);
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	dev_put(entry->physin);
+	dev_put(entry->physout);
+#endif
+}
+
+void nf_queue_entry_free(struct nf_queue_entry *entry)
+{
+	nf_queue_entry_release_refs(entry);
+	kfree(entry);
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_free);
+
+static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	const struct sk_buff *skb = entry->skb;
+
+	if (nf_bridge_info_exists(skb)) {
+		entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
+		entry->physout = nf_bridge_get_physoutdev(skb);
+	} else {
+		entry->physin = NULL;
+		entry->physout = NULL;
+	}
+#endif
+}
+
+/* Bump dev refs so they don't vanish while packet is out */
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+{
+	struct nf_hook_state *state = &entry->state;
+
+	if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+		return false;
+
+	dev_hold(state->in);
+	dev_hold(state->out);
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	dev_hold(entry->physin);
+	dev_hold(entry->physout);
+#endif
+	return true;
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
+
+void nf_queue_nf_hook_drop(struct net *net)
+{
+	const struct nf_queue_handler *qh;
+
+	rcu_read_lock();
+	qh = rcu_dereference(nf_queue_handler);
+	if (qh)
+		qh->nf_hook_drop(net);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+
+static void nf_ip_saveroute(const struct sk_buff *skb,
+			    struct nf_queue_entry *entry)
+{
+	struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		rt_info->tos = iph->tos;
+		rt_info->daddr = iph->daddr;
+		rt_info->saddr = iph->saddr;
+		rt_info->mark = skb->mark;
+	}
+}
+
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+			     struct nf_queue_entry *entry)
+{
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		rt_info->daddr = iph->daddr;
+		rt_info->saddr = iph->saddr;
+		rt_info->mark = skb->mark;
+	}
+}
+
+static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+		      unsigned int index, unsigned int queuenum)
+{
+	struct nf_queue_entry *entry = NULL;
+	const struct nf_queue_handler *qh;
+	unsigned int route_key_size;
+	int status;
+
+	/* QUEUE == DROP if no one is waiting, to be safe. */
+	qh = rcu_dereference(nf_queue_handler);
+	if (!qh)
+		return -ESRCH;
+
+	switch (state->pf) {
+	case AF_INET:
+		route_key_size = sizeof(struct ip_rt_info);
+		break;
+	case AF_INET6:
+		route_key_size = sizeof(struct ip6_rt_info);
+		break;
+	default:
+		route_key_size = 0;
+		break;
+	}
+
+	if (skb_sk_is_prefetched(skb)) {
+		struct sock *sk = skb->sk;
+
+		if (!sk_is_refcounted(sk)) {
+			if (!refcount_inc_not_zero(&sk->sk_refcnt))
+				return -ENOTCONN;
+
+			/* drop refcount on skb_orphan */
+			skb->destructor = sock_edemux;
+		}
+	}
+
+	entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	if (skb_dst(skb) && !skb_dst_force(skb)) {
+		kfree(entry);
+		return -ENETDOWN;
+	}
+
+	*entry = (struct nf_queue_entry) {
+		.skb	= skb,
+		.state	= *state,
+		.hook_index = index,
+		.size	= sizeof(*entry) + route_key_size,
+	};
+
+	__nf_queue_entry_init_physdevs(entry);
+
+	if (!nf_queue_entry_get_refs(entry)) {
+		kfree(entry);
+		return -ENOTCONN;
+	}
+
+	switch (entry->state.pf) {
+	case AF_INET:
+		nf_ip_saveroute(skb, entry);
+		break;
+	case AF_INET6:
+		nf_ip6_saveroute(skb, entry);
+		break;
+	}
+
+	status = qh->outfn(entry, queuenum);
+	if (status < 0) {
+		nf_queue_entry_free(entry);
+		return status;
+	}
+
+	return 0;
+}
+
+/* Packets leaving via this function must come back through nf_reinject(). */
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+	     unsigned int index, unsigned int verdict)
+{
+	int ret;
+
+	ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS);
+	if (ret < 0) {
+		if (ret == -ESRCH &&
+		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+			return 1;
+		kfree_skb(skb);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_queue);
+
+static unsigned int nf_iterate(struct sk_buff *skb,
+			       struct nf_hook_state *state,
+			       const struct nf_hook_entries *hooks,
+			       unsigned int *index)
+{
+	const struct nf_hook_entry *hook;
+	unsigned int verdict, i = *index;
+
+	while (i < hooks->num_hook_entries) {
+		hook = &hooks->hooks[i];
+repeat:
+		verdict = nf_hook_entry_hookfn(hook, skb, state);
+		if (verdict != NF_ACCEPT) {
+			*index = i;
+			if (verdict != NF_REPEAT)
+				return verdict;
+			goto repeat;
+		}
+		i++;
+	}
+
+	*index = i;
+	return NF_ACCEPT;
+}
+
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+{
+	switch (pf) {
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+	case NFPROTO_BRIDGE:
+		return rcu_dereference(net->nf.hooks_bridge[hooknum]);
+#endif
+	case NFPROTO_IPV4:
+		return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+	case NFPROTO_IPV6:
+		return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
+	default:
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
+	return NULL;
+}
+
+/* Caller must hold rcu read-side lock */
+void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+	const struct nf_hook_entry *hook_entry;
+	const struct nf_hook_entries *hooks;
+	struct sk_buff *skb = entry->skb;
+	const struct net *net;
+	unsigned int i;
+	int err;
+	u8 pf;
+
+	net = entry->state.net;
+	pf = entry->state.pf;
+
+	hooks = nf_hook_entries_head(net, pf, entry->state.hook);
+
+	i = entry->hook_index;
+	if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+		kfree_skb(skb);
+		nf_queue_entry_free(entry);
+		return;
+	}
+
+	hook_entry = &hooks->hooks[i];
+
+	/* Continue traversal iff userspace said ok... */
+	if (verdict == NF_REPEAT)
+		verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
+
+	if (verdict == NF_ACCEPT) {
+		if (nf_reroute(skb, entry) < 0)
+			verdict = NF_DROP;
+	}
+
+	if (verdict == NF_ACCEPT) {
+next_hook:
+		++i;
+		verdict = nf_iterate(skb, &entry->state, hooks, &i);
+	}
+
+	switch (verdict & NF_VERDICT_MASK) {
+	case NF_ACCEPT:
+	case NF_STOP:
+		local_bh_disable();
+		entry->state.okfn(entry->state.net, entry->state.sk, skb);
+		local_bh_enable();
+		break;
+	case NF_QUEUE:
+		err = nf_queue(skb, &entry->state, i, verdict);
+		if (err == 1)
+			goto next_hook;
+		break;
+	case NF_STOLEN:
+		break;
+	default:
+		kfree_skb(skb);
+	}
+
+	nf_queue_entry_free(entry);
+}
+EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
new file mode 100644
index 000000000..34afcd03b
--- /dev/null
+++ b/net/netfilter/nf_sockopt.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/mutex.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+/* Sockopts only registered and called from user context, so
+   net locking would be overkill.  Also, [gs]etsockopt calls may
+   sleep. */
+static DEFINE_MUTEX(nf_sockopt_mutex);
+static LIST_HEAD(nf_sockopts);
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+	return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+	struct nf_sockopt_ops *ops;
+	int ret = 0;
+
+	mutex_lock(&nf_sockopt_mutex);
+	list_for_each_entry(ops, &nf_sockopts, list) {
+		if (ops->pf == reg->pf
+		    && (overlap(ops->set_optmin, ops->set_optmax,
+				reg->set_optmin, reg->set_optmax)
+			|| overlap(ops->get_optmin, ops->get_optmax,
+				   reg->get_optmin, reg->get_optmax))) {
+			pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+				ops->set_optmin, ops->set_optmax,
+				ops->get_optmin, ops->get_optmax,
+				reg->set_optmin, reg->set_optmax,
+				reg->get_optmin, reg->get_optmax);
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_add(&reg->list, &nf_sockopts);
+out:
+	mutex_unlock(&nf_sockopt_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(nf_register_sockopt);
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+	mutex_lock(&nf_sockopt_mutex);
+	list_del(&reg->list);
+	mutex_unlock(&nf_sockopt_mutex);
+}
+EXPORT_SYMBOL(nf_unregister_sockopt);
+
+static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
+		int val, int get)
+{
+	struct nf_sockopt_ops *ops;
+
+	mutex_lock(&nf_sockopt_mutex);
+	list_for_each_entry(ops, &nf_sockopts, list) {
+		if (ops->pf == pf) {
+			if (!try_module_get(ops->owner))
+				goto out_nosup;
+
+			if (get) {
+				if (val >= ops->get_optmin &&
+						val < ops->get_optmax)
+					goto out;
+			} else {
+				if (val >= ops->set_optmin &&
+						val < ops->set_optmax)
+					goto out;
+			}
+			module_put(ops->owner);
+		}
+	}
+out_nosup:
+	ops = ERR_PTR(-ENOPROTOOPT);
+out:
+	mutex_unlock(&nf_sockopt_mutex);
+	return ops;
+}
+
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
+		  unsigned int len)
+{
+	struct nf_sockopt_ops *ops;
+	int ret;
+
+	ops = nf_sockopt_find(sk, pf, val, 0);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	ret = ops->set(sk, val, opt, len);
+	module_put(ops->owner);
+	return ret;
+}
+EXPORT_SYMBOL(nf_setsockopt);
+
+int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+		  int *len)
+{
+	struct nf_sockopt_ops *ops;
+	int ret;
+
+	ops = nf_sockopt_find(sk, pf, val, 1);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	ret = ops->get(sk, val, opt, len);
+	module_put(ops->owner);
+	return ret;
+}
+EXPORT_SYMBOL(nf_getsockopt);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 000000000..16915f8ee
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,1220 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <asm/unaligned.h>
+#include <net/tcp.h>
+#include <net/netns/generic.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_synproxy.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_synproxy.h>
+
+unsigned int synproxy_net_id;
+EXPORT_SYMBOL_GPL(synproxy_net_id);
+
+bool
+synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+		       const struct tcphdr *th, struct synproxy_options *opts)
+{
+	int length = (th->doff * 4) - sizeof(*th);
+	u8 buf[40], *ptr;
+
+	if (unlikely(length < 0))
+		return false;
+
+	ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+	if (ptr == NULL)
+		return false;
+
+	opts->options = 0;
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return true;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return true;
+			opsize = *ptr++;
+			if (opsize < 2)
+				return true;
+			if (opsize > length)
+				return true;
+
+			switch (opcode) {
+			case TCPOPT_MSS:
+				if (opsize == TCPOLEN_MSS) {
+					opts->mss_option = get_unaligned_be16(ptr);
+					opts->options |= NF_SYNPROXY_OPT_MSS;
+				}
+				break;
+			case TCPOPT_WINDOW:
+				if (opsize == TCPOLEN_WINDOW) {
+					opts->wscale = *ptr;
+					if (opts->wscale > TCP_MAX_WSCALE)
+						opts->wscale = TCP_MAX_WSCALE;
+					opts->options |= NF_SYNPROXY_OPT_WSCALE;
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (opsize == TCPOLEN_TIMESTAMP) {
+					opts->tsval = get_unaligned_be32(ptr);
+					opts->tsecr = get_unaligned_be32(ptr + 4);
+					opts->options |= NF_SYNPROXY_OPT_TIMESTAMP;
+				}
+				break;
+			case TCPOPT_SACK_PERM:
+				if (opsize == TCPOLEN_SACK_PERM)
+					opts->options |= NF_SYNPROXY_OPT_SACK_PERM;
+				break;
+			}
+
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_parse_options);
+
+static unsigned int
+synproxy_options_size(const struct synproxy_options *opts)
+{
+	unsigned int size = 0;
+
+	if (opts->options & NF_SYNPROXY_OPT_MSS)
+		size += TCPOLEN_MSS_ALIGNED;
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		size += TCPOLEN_TSTAMP_ALIGNED;
+	else if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
+		size += TCPOLEN_SACKPERM_ALIGNED;
+	if (opts->options & NF_SYNPROXY_OPT_WSCALE)
+		size += TCPOLEN_WSCALE_ALIGNED;
+
+	return size;
+}
+
+static void
+synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
+{
+	__be32 *ptr = (__be32 *)(th + 1);
+	u8 options = opts->options;
+
+	if (options & NF_SYNPROXY_OPT_MSS)
+		*ptr++ = htonl((TCPOPT_MSS << 24) |
+			       (TCPOLEN_MSS << 16) |
+			       opts->mss_option);
+
+	if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
+		if (options & NF_SYNPROXY_OPT_SACK_PERM)
+			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+				       (TCPOLEN_SACK_PERM << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+		else
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+
+		*ptr++ = htonl(opts->tsval);
+		*ptr++ = htonl(opts->tsecr);
+	} else if (options & NF_SYNPROXY_OPT_SACK_PERM)
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_NOP << 16) |
+			       (TCPOPT_SACK_PERM << 8) |
+			       TCPOLEN_SACK_PERM);
+
+	if (options & NF_SYNPROXY_OPT_WSCALE)
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_WINDOW << 16) |
+			       (TCPOLEN_WINDOW << 8) |
+			       opts->wscale);
+}
+
+void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
+				    struct synproxy_options *opts)
+{
+	opts->tsecr = opts->tsval;
+	opts->tsval = tcp_time_stamp_raw() & ~0x3f;
+
+	if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
+		opts->tsval |= opts->wscale;
+		opts->wscale = info->wscale;
+	} else
+		opts->tsval |= 0xf;
+
+	if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
+		opts->tsval |= 1 << 4;
+
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		opts->tsval |= 1 << 5;
+}
+EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
+
+static void
+synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+{
+	opts->wscale = opts->tsecr & 0xf;
+	if (opts->wscale != 0xf)
+		opts->options |= NF_SYNPROXY_OPT_WSCALE;
+
+	opts->options |= opts->tsecr & (1 << 4) ? NF_SYNPROXY_OPT_SACK_PERM : 0;
+
+	opts->options |= opts->tsecr & (1 << 5) ? NF_SYNPROXY_OPT_ECN : 0;
+}
+
+static unsigned int
+synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff,
+		       struct tcphdr *th, struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       const struct nf_conn_synproxy *synproxy)
+{
+	unsigned int optoff, optend;
+	__be32 *ptr, old;
+
+	if (synproxy->tsoff == 0)
+		return 1;
+
+	optoff = protoff + sizeof(struct tcphdr);
+	optend = protoff + th->doff * 4;
+
+	if (skb_ensure_writable(skb, optend))
+		return 0;
+
+	while (optoff < optend) {
+		unsigned char *op = skb->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			if (optoff + 1 == optend ||
+			    optoff + op[1] > optend ||
+			    op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_TIMESTAMP &&
+			    op[1] == TCPOLEN_TIMESTAMP) {
+				if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+					ptr = (__be32 *)&op[2];
+					old = *ptr;
+					*ptr = htonl(ntohl(*ptr) -
+						     synproxy->tsoff);
+				} else {
+					ptr = (__be32 *)&op[6];
+					old = *ptr;
+					*ptr = htonl(ntohl(*ptr) +
+						     synproxy->tsoff);
+				}
+				inet_proto_csum_replace4(&th->check, skb,
+							 old, *ptr, false);
+				return 1;
+			}
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(snet->stats, cpu);
+	}
+
+	return NULL;
+}
+
+static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+	int cpu;
+
+	for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu + 1;
+		return per_cpu_ptr(snet->stats, cpu);
+	}
+	(*pos)++;
+	return NULL;
+}
+
+static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+	return;
+}
+
+static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	struct synproxy_stats *stats = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "entries\t\tsyn_received\t"
+			      "cookie_invalid\tcookie_valid\t"
+			      "cookie_retrans\tconn_reopened\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
+		   stats->syn_received,
+		   stats->cookie_invalid,
+		   stats->cookie_valid,
+		   stats->cookie_retrans,
+		   stats->conn_reopened);
+
+	return 0;
+}
+
+static const struct seq_operations synproxy_cpu_seq_ops = {
+	.start		= synproxy_cpu_seq_start,
+	.next		= synproxy_cpu_seq_next,
+	.stop		= synproxy_cpu_seq_stop,
+	.show		= synproxy_cpu_seq_show,
+};
+
+static int __net_init synproxy_proc_init(struct net *net)
+{
+	if (!proc_create_net("synproxy", 0444, net->proc_net_stat,
+			&synproxy_cpu_seq_ops, sizeof(struct seq_net_private)))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+	remove_proc_entry("synproxy", net->proc_net_stat);
+}
+#else
+static int __net_init synproxy_proc_init(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+	return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __net_init synproxy_net_init(struct net *net)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct nf_conn *ct;
+	int err = -ENOMEM;
+
+	ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL);
+	if (!ct)
+		goto err1;
+
+	if (!nfct_seqadj_ext_add(ct))
+		goto err2;
+	if (!nfct_synproxy_ext_add(ct))
+		goto err2;
+
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	snet->tmpl = ct;
+
+	snet->stats = alloc_percpu(struct synproxy_stats);
+	if (snet->stats == NULL)
+		goto err2;
+
+	err = synproxy_proc_init(net);
+	if (err < 0)
+		goto err3;
+
+	return 0;
+
+err3:
+	free_percpu(snet->stats);
+err2:
+	nf_ct_tmpl_free(ct);
+err1:
+	return err;
+}
+
+static void __net_exit synproxy_net_exit(struct net *net)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+
+	nf_ct_put(snet->tmpl);
+	synproxy_proc_exit(net);
+	free_percpu(snet->stats);
+}
+
+static struct pernet_operations synproxy_net_ops = {
+	.init		= synproxy_net_init,
+	.exit		= synproxy_net_exit,
+	.id		= &synproxy_net_id,
+	.size		= sizeof(struct synproxy_net),
+};
+
+static int __init synproxy_core_init(void)
+{
+	return register_pernet_subsys(&synproxy_net_ops);
+}
+
+static void __exit synproxy_core_exit(void)
+{
+	unregister_pernet_subsys(&synproxy_net_ops);
+}
+
+module_init(synproxy_core_init);
+module_exit(synproxy_core_exit);
+
+static struct iphdr *
+synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
+		  __be32 daddr)
+{
+	struct iphdr *iph;
+
+	skb_reset_network_header(skb);
+	iph = skb_put(skb, sizeof(*iph));
+	iph->version	= 4;
+	iph->ihl	= sizeof(*iph) / 4;
+	iph->tos	= 0;
+	iph->id		= 0;
+	iph->frag_off	= htons(IP_DF);
+	iph->ttl	= READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+	iph->protocol	= IPPROTO_TCP;
+	iph->check	= 0;
+	iph->saddr	= saddr;
+	iph->daddr	= daddr;
+
+	return iph;
+}
+
+static void
+synproxy_send_tcp(struct net *net,
+		  const struct sk_buff *skb, struct sk_buff *nskb,
+		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+		  struct iphdr *niph, struct tcphdr *nth,
+		  unsigned int tcp_hdr_size)
+{
+	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+	nskb->ip_summed   = CHECKSUM_PARTIAL;
+	nskb->csum_start  = (unsigned char *)nth - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
+
+	skb_dst_set_noref(nskb, skb_dst(skb));
+	nskb->protocol = htons(ETH_P_IP);
+	if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
+		goto free_nskb;
+
+	if (nfct) {
+		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+		nf_conntrack_get(nfct);
+	}
+
+	ip_local_out(net, nskb->sk, nskb);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack(struct net *net,
+			    const struct sk_buff *skb, const struct tcphdr *th,
+			    const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+	u16 mss = opts->mss_encode;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(__cookie_v4_init_sequence(iph, th, &mss));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= 0;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack);
+
+static void
+synproxy_send_server_syn(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(recv_seq - 1);
+	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
+	 * sequence number translation once a connection tracking entry exists.
+	 */
+	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= th->window;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+			  niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(struct net *net,
+			 const struct ip_ct_tcp *state,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(ntohl(th->ack_seq));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct iphdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ip_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(ntohl(th->seq) + 1);
+	nth->ack_seq	= th->ack_seq;
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(ntohs(th->window) >> opts->wscale);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+			  IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack(struct net *net,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	int mss;
+
+	mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+	if (mss == 0) {
+		this_cpu_inc(snet->stats->cookie_invalid);
+		return false;
+	}
+
+	this_cpu_inc(snet->stats->cookie_valid);
+	opts->mss_option = mss;
+	opts->options |= NF_SYNPROXY_OPT_MSS;
+
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_check_timestamp_cookie(opts);
+
+	synproxy_send_server_syn(net, skb, th, opts, recv_seq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack);
+
+unsigned int
+ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
+		   const struct nf_hook_state *nhs)
+{
+	struct net *net = nhs->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	struct nf_conn_synproxy *synproxy;
+	struct synproxy_options opts = {};
+	const struct ip_ct_tcp *state;
+	struct tcphdr *th, _th;
+	unsigned int thoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	synproxy = nfct_synproxy(ct);
+	if (!synproxy)
+		return NF_ACCEPT;
+
+	if (nf_is_loopback_packet(skb) ||
+	    ip_hdr(skb)->protocol != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	thoff = ip_hdrlen(skb);
+	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+	if (!th)
+		return NF_DROP;
+
+	state = &ct->proto.tcp;
+	switch (state->state) {
+	case TCP_CONNTRACK_CLOSE:
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+						      ntohl(th->seq) + 1);
+			break;
+		}
+
+		if (!th->syn || th->ack ||
+		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+			break;
+
+		/* Reopened connection - reset the sequence number and timestamp
+		 * adjustments, they will get initialized once the connection is
+		 * reestablished.
+		 */
+		nf_ct_seqadj_init(ct, ctinfo, 0);
+		synproxy->tsoff = 0;
+		this_cpu_inc(snet->stats->conn_reopened);
+		fallthrough;
+	case TCP_CONNTRACK_SYN_SENT:
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (!th->syn && th->ack &&
+		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+			 * therefore we need to add 1 to make the SYN sequence
+			 * number match the one of first SYN.
+			 */
+			if (synproxy_recv_client_ack(net, skb, th, &opts,
+						     ntohl(th->seq) + 1)) {
+				this_cpu_inc(snet->stats->cookie_retrans);
+				consume_skb(skb);
+				return NF_STOLEN;
+			} else {
+				return NF_DROP;
+			}
+		}
+
+		synproxy->isn = ntohl(th->ack_seq);
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (!th->syn || !th->ack)
+			break;
+
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
+
+		opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+				  NF_SYNPROXY_OPT_WSCALE |
+				  NF_SYNPROXY_OPT_SACK_PERM);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_server_ack(net, state, skb, th, &opts);
+
+		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_client_ack(net, skb, th, &opts);
+
+		consume_skb(skb);
+		return NF_STOLEN;
+	default:
+		break;
+	}
+
+	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv4_synproxy_hook);
+
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
+	{
+		.hook		= ipv4_synproxy_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+	{
+		.hook		= ipv4_synproxy_hook,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+};
+
+int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net)
+{
+	int err;
+
+	if (snet->hook_ref4 == 0) {
+		err = nf_register_net_hooks(net, ipv4_synproxy_ops,
+					    ARRAY_SIZE(ipv4_synproxy_ops));
+		if (err)
+			return err;
+	}
+
+	snet->hook_ref4++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init);
+
+void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net)
+{
+	snet->hook_ref4--;
+	if (snet->hook_ref4 == 0)
+		nf_unregister_net_hooks(net, ipv4_synproxy_ops,
+					ARRAY_SIZE(ipv4_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct ipv6hdr *
+synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr)
+{
+	struct ipv6hdr *iph;
+
+	skb_reset_network_header(skb);
+	iph = skb_put(skb, sizeof(*iph));
+	ip6_flow_hdr(iph, 0, 0);
+	iph->hop_limit	= net->ipv6.devconf_all->hop_limit;
+	iph->nexthdr	= IPPROTO_TCP;
+	iph->saddr	= *saddr;
+	iph->daddr	= *daddr;
+
+	return iph;
+}
+
+static void
+synproxy_send_tcp_ipv6(struct net *net,
+		       const struct sk_buff *skb, struct sk_buff *nskb,
+		       struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+		       struct ipv6hdr *niph, struct tcphdr *nth,
+		       unsigned int tcp_hdr_size)
+{
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+	int err;
+
+	nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+	nskb->ip_summed   = CHECKSUM_PARTIAL;
+	nskb->csum_start  = (unsigned char *)nth - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_TCP;
+	fl6.saddr = niph->saddr;
+	fl6.daddr = niph->daddr;
+	fl6.fl6_sport = nth->source;
+	fl6.fl6_dport = nth->dest;
+	security_skb_classify_flow((struct sk_buff *)skb,
+				   flowi6_to_flowi_common(&fl6));
+	err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
+	if (err) {
+		goto free_nskb;
+	}
+
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+	if (IS_ERR(dst))
+		goto free_nskb;
+
+	skb_dst_set(nskb, dst);
+
+	if (nfct) {
+		nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+		nf_conntrack_get(nfct);
+	}
+
+	ip6_local_out(net, nskb->sk, nskb);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack_ipv6(struct net *net,
+				 const struct sk_buff *skb,
+				 const struct tcphdr *th,
+				 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+	u16 mss = opts->mss_encode;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(nf_ipv6_cookie_init_sequence(iph, th, &mss));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= 0;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+			       IP_CT_ESTABLISHED_REPLY, niph, nth,
+			       tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack_ipv6);
+
+static void
+synproxy_send_server_syn_ipv6(struct net *net, const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(recv_seq - 1);
+	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
+	 * sequence number translation once a connection tracking entry exists.
+	 */
+	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN;
+	if (opts->options & NF_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= th->window;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, &snet->tmpl->ct_general,
+			       IP_CT_NEW, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack_ipv6(struct net *net, const struct ip_ct_tcp *state,
+			      const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(ntohl(th->ack_seq));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, NULL, 0, niph, nth,
+			       tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack_ipv6(struct net *net, const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (!nskb)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(ntohl(th->seq) + 1);
+	nth->ack_seq	= th->ack_seq;
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(ntohs(th->window) >> opts->wscale);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+			       IP_CT_ESTABLISHED_REPLY, niph, nth,
+			       tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack_ipv6(struct net *net,
+			      const struct sk_buff *skb,
+			      const struct tcphdr *th,
+			      struct synproxy_options *opts, u32 recv_seq)
+{
+	struct synproxy_net *snet = synproxy_pernet(net);
+	int mss;
+
+	mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+	if (mss == 0) {
+		this_cpu_inc(snet->stats->cookie_invalid);
+		return false;
+	}
+
+	this_cpu_inc(snet->stats->cookie_valid);
+	opts->mss_option = mss;
+	opts->options |= NF_SYNPROXY_OPT_MSS;
+
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_check_timestamp_cookie(opts);
+
+	synproxy_send_server_syn_ipv6(net, skb, th, opts, recv_seq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack_ipv6);
+
+unsigned int
+ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
+		   const struct nf_hook_state *nhs)
+{
+	struct net *net = nhs->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	struct nf_conn_synproxy *synproxy;
+	struct synproxy_options opts = {};
+	const struct ip_ct_tcp *state;
+	struct tcphdr *th, _th;
+	__be16 frag_off;
+	u8 nexthdr;
+	int thoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return NF_ACCEPT;
+
+	synproxy = nfct_synproxy(ct);
+	if (!synproxy)
+		return NF_ACCEPT;
+
+	if (nf_is_loopback_packet(skb))
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				 &frag_off);
+	if (thoff < 0 || nexthdr != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+	if (!th)
+		return NF_DROP;
+
+	state = &ct->proto.tcp;
+	switch (state->state) {
+	case TCP_CONNTRACK_CLOSE:
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+						      ntohl(th->seq) + 1);
+			break;
+		}
+
+		if (!th->syn || th->ack ||
+		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+			break;
+
+		/* Reopened connection - reset the sequence number and timestamp
+		 * adjustments, they will get initialized once the connection is
+		 * reestablished.
+		 */
+		nf_ct_seqadj_init(ct, ctinfo, 0);
+		synproxy->tsoff = 0;
+		this_cpu_inc(snet->stats->conn_reopened);
+		fallthrough;
+	case TCP_CONNTRACK_SYN_SENT:
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (!th->syn && th->ack &&
+		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+			 * therefore we need to add 1 to make the SYN sequence
+			 * number match the one of first SYN.
+			 */
+			if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts,
+							  ntohl(th->seq) + 1)) {
+				this_cpu_inc(snet->stats->cookie_retrans);
+				consume_skb(skb);
+				return NF_STOLEN;
+			} else {
+				return NF_DROP;
+			}
+		}
+
+		synproxy->isn = ntohl(th->ack_seq);
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (!th->syn || !th->ack)
+			break;
+
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
+
+		opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+				  NF_SYNPROXY_OPT_WSCALE |
+				  NF_SYNPROXY_OPT_SACK_PERM);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_server_ack_ipv6(net, state, skb, th, &opts);
+
+		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_client_ack_ipv6(net, skb, th, &opts);
+
+		consume_skb(skb);
+		return NF_STOLEN;
+	default:
+		break;
+	}
+
+	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv6_synproxy_hook);
+
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
+	{
+		.hook		= ipv6_synproxy_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+	{
+		.hook		= ipv6_synproxy_hook,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+};
+
+int
+nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net)
+{
+	int err;
+
+	if (snet->hook_ref6 == 0) {
+		err = nf_register_net_hooks(net, ipv6_synproxy_ops,
+					    ARRAY_SIZE(ipv6_synproxy_ops));
+		if (err)
+			return err;
+	}
+
+	snet->hook_ref6++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init);
+
+void
+nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net)
+{
+	snet->hook_ref6--;
+	if (snet->hook_ref6 == 0)
+		nf_unregister_net_hooks(net, ipv6_synproxy_ops,
+					ARRAY_SIZE(ipv6_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
+#endif /* CONFIG_IPV6 */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 000000000..1edb21382
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,10973 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
+#include <linux/audit.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
+
+unsigned int nf_tables_net_id __read_mostly;
+
+static LIST_HEAD(nf_tables_expressions);
+static LIST_HEAD(nf_tables_objects);
+static LIST_HEAD(nf_tables_flowtables);
+static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
+static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
+
+enum {
+	NFT_VALIDATE_SKIP	= 0,
+	NFT_VALIDATE_NEED,
+	NFT_VALIDATE_DO,
+};
+
+static struct rhltable nft_objname_ht;
+
+static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
+static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
+static const struct rhashtable_params nft_chain_ht_params = {
+	.head_offset		= offsetof(struct nft_chain, rhlhead),
+	.key_offset		= offsetof(struct nft_chain, name),
+	.hashfn			= nft_chain_hash,
+	.obj_hashfn		= nft_chain_hash_obj,
+	.obj_cmpfn		= nft_chain_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
+static const struct rhashtable_params nft_objname_ht_params = {
+	.head_offset		= offsetof(struct nft_object, rhlhead),
+	.key_offset		= offsetof(struct nft_object, key),
+	.hashfn			= nft_objname_hash,
+	.obj_hashfn		= nft_objname_hash_obj,
+	.obj_cmpfn		= nft_objname_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
+struct nft_audit_data {
+	struct nft_table *table;
+	int entries;
+	int op;
+	struct list_head list;
+};
+
+static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
+	[NFT_MSG_NEWTABLE]	= AUDIT_NFT_OP_TABLE_REGISTER,
+	[NFT_MSG_GETTABLE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELTABLE]	= AUDIT_NFT_OP_TABLE_UNREGISTER,
+	[NFT_MSG_NEWCHAIN]	= AUDIT_NFT_OP_CHAIN_REGISTER,
+	[NFT_MSG_GETCHAIN]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELCHAIN]	= AUDIT_NFT_OP_CHAIN_UNREGISTER,
+	[NFT_MSG_NEWRULE]	= AUDIT_NFT_OP_RULE_REGISTER,
+	[NFT_MSG_GETRULE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELRULE]	= AUDIT_NFT_OP_RULE_UNREGISTER,
+	[NFT_MSG_NEWSET]	= AUDIT_NFT_OP_SET_REGISTER,
+	[NFT_MSG_GETSET]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELSET]	= AUDIT_NFT_OP_SET_UNREGISTER,
+	[NFT_MSG_NEWSETELEM]	= AUDIT_NFT_OP_SETELEM_REGISTER,
+	[NFT_MSG_GETSETELEM]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELSETELEM]	= AUDIT_NFT_OP_SETELEM_UNREGISTER,
+	[NFT_MSG_NEWGEN]	= AUDIT_NFT_OP_GEN_REGISTER,
+	[NFT_MSG_GETGEN]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_TRACE]		= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_NEWOBJ]	= AUDIT_NFT_OP_OBJ_REGISTER,
+	[NFT_MSG_GETOBJ]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELOBJ]	= AUDIT_NFT_OP_OBJ_UNREGISTER,
+	[NFT_MSG_GETOBJ_RESET]	= AUDIT_NFT_OP_OBJ_RESET,
+	[NFT_MSG_NEWFLOWTABLE]	= AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+	[NFT_MSG_GETFLOWTABLE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELFLOWTABLE]	= AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+};
+
+static void nft_validate_state_update(struct net *net, u8 new_validate_state)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	switch (nft_net->validate_state) {
+	case NFT_VALIDATE_SKIP:
+		WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
+		break;
+	case NFT_VALIDATE_NEED:
+		break;
+	case NFT_VALIDATE_DO:
+		if (new_validate_state == NFT_VALIDATE_NEED)
+			return;
+	}
+
+	nft_net->validate_state = new_validate_state;
+}
+static void nf_tables_trans_destroy_work(struct work_struct *w);
+static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 struct net *net,
+			 const struct sk_buff *skb,
+			 const struct nlmsghdr *nlh,
+			 u8 family,
+			 struct nft_table *table,
+			 struct nft_chain *chain,
+			 const struct nlattr * const *nla)
+{
+	ctx->net	= net;
+	ctx->family	= family;
+	ctx->level	= 0;
+	ctx->table	= table;
+	ctx->chain	= chain;
+	ctx->nla   	= nla;
+	ctx->portid	= NETLINK_CB(skb).portid;
+	ctx->report	= nlmsg_report(nlh);
+	ctx->flags	= nlh->nlmsg_flags;
+	ctx->seq	= nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
+					     int msg_type, u32 size, gfp_t gfp)
+{
+	struct nft_trans *trans;
+
+	trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
+	if (trans == NULL)
+		return NULL;
+
+	INIT_LIST_HEAD(&trans->list);
+	INIT_LIST_HEAD(&trans->binding_list);
+	trans->msg_type = msg_type;
+	trans->ctx	= *ctx;
+
+	return trans;
+}
+
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+					 int msg_type, u32 size)
+{
+	return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
+}
+
+static void nft_trans_list_del(struct nft_trans *trans)
+{
+	list_del(&trans->list);
+	list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+	nft_trans_list_del(trans);
+	kfree(trans);
+}
+
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+				 bool bind)
+{
+	struct nftables_pernet *nft_net;
+	struct net *net = ctx->net;
+	struct nft_trans *trans;
+
+	if (!nft_set_is_anonymous(set))
+		return;
+
+	nft_net = nft_pernet(net);
+	list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWSET:
+			if (nft_trans_set(trans) == set)
+				nft_trans_set_bound(trans) = bind;
+			break;
+		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set(trans) == set)
+				nft_trans_elem_set_bound(trans) = bind;
+			break;
+		}
+	}
+}
+
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
+				   struct nft_chain *chain, bool bind)
+{
+	struct nftables_pernet *nft_net;
+	struct net *net = ctx->net;
+	struct nft_trans *trans;
+
+	if (!nft_chain_binding(chain))
+		return;
+
+	nft_net = nft_pernet(net);
+	list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain(trans) == chain)
+				nft_trans_chain_bound(trans) = bind;
+			break;
+		case NFT_MSG_NEWRULE:
+			if (trans->ctx.chain == chain)
+				nft_trans_rule_bound(trans) = bind;
+			break;
+		}
+	}
+}
+
+static void nft_chain_trans_bind(const struct nft_ctx *ctx,
+				 struct nft_chain *chain)
+{
+	__nft_chain_trans_bind(ctx, chain, true);
+}
+
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+	if (!nft_chain_binding(chain))
+		return 0;
+
+	if (nft_chain_binding(ctx->chain))
+		return -EOPNOTSUPP;
+
+	if (chain->bound)
+		return -EBUSY;
+
+	if (!nft_use_inc(&chain->use))
+		return -EMFILE;
+
+	chain->bound = true;
+	nft_chain_trans_bind(ctx, chain);
+
+	return 0;
+}
+
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+	__nft_chain_trans_bind(ctx, chain, false);
+}
+
+static int nft_netdev_register_hooks(struct net *net,
+				     struct list_head *hook_list)
+{
+	struct nft_hook *hook;
+	int err, j;
+
+	j = 0;
+	list_for_each_entry(hook, hook_list, list) {
+		err = nf_register_net_hook(net, &hook->ops);
+		if (err < 0)
+			goto err_register;
+
+		j++;
+	}
+	return 0;
+
+err_register:
+	list_for_each_entry(hook, hook_list, list) {
+		if (j-- <= 0)
+			break;
+
+		nf_unregister_net_hook(net, &hook->ops);
+	}
+	return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+					struct list_head *hook_list,
+					bool release_netdev)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		nf_unregister_net_hook(net, &hook->ops);
+		if (release_netdev) {
+			list_del(&hook->list);
+			kfree_rcu(hook, rcu);
+		}
+	}
+}
+
+static int nf_tables_register_hook(struct net *net,
+				   const struct nft_table *table,
+				   struct nft_chain *chain)
+{
+	struct nft_base_chain *basechain;
+	const struct nf_hook_ops *ops;
+
+	if (table->flags & NFT_TABLE_F_DORMANT ||
+	    !nft_is_base_chain(chain))
+		return 0;
+
+	basechain = nft_base_chain(chain);
+	ops = &basechain->ops;
+
+	if (basechain->type->ops_register)
+		return basechain->type->ops_register(net, ops);
+
+	if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+		return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+	return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void __nf_tables_unregister_hook(struct net *net,
+					const struct nft_table *table,
+					struct nft_chain *chain,
+					bool release_netdev)
+{
+	struct nft_base_chain *basechain;
+	const struct nf_hook_ops *ops;
+
+	if (table->flags & NFT_TABLE_F_DORMANT ||
+	    !nft_is_base_chain(chain))
+		return;
+	basechain = nft_base_chain(chain);
+	ops = &basechain->ops;
+
+	if (basechain->type->ops_unregister)
+		return basechain->type->ops_unregister(net, ops);
+
+	if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+		nft_netdev_unregister_hooks(net, &basechain->hook_list,
+					    release_netdev);
+	else
+		nf_unregister_net_hook(net, &basechain->ops);
+}
+
+static void nf_tables_unregister_hook(struct net *net,
+				      const struct nft_table *table,
+				      struct nft_chain *chain)
+{
+	return __nf_tables_unregister_hook(net, table, chain, false);
+}
+
+static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	switch (trans->msg_type) {
+	case NFT_MSG_NEWSET:
+		if (!nft_trans_set_update(trans) &&
+		    nft_set_is_anonymous(nft_trans_set(trans)))
+			list_add_tail(&trans->binding_list, &nft_net->binding_list);
+		break;
+	case NFT_MSG_NEWCHAIN:
+		if (!nft_trans_chain_update(trans) &&
+		    nft_chain_binding(nft_trans_chain(trans)))
+			list_add_tail(&trans->binding_list, &nft_net->binding_list);
+		break;
+	}
+
+	list_add_tail(&trans->list, &nft_net->commit_list);
+}
+
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWTABLE)
+		nft_activate_next(ctx->net, ctx->table);
+
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+	return 0;
+}
+
+static int nft_deltable(struct nft_ctx *ctx)
+{
+	int err;
+
+	err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
+	if (err < 0)
+		return err;
+
+	nft_deactivate_next(ctx->net, ctx->table);
+	return err;
+}
+
+static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+	if (trans == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (msg_type == NFT_MSG_NEWCHAIN) {
+		nft_activate_next(ctx->net, ctx->chain);
+
+		if (ctx->nla[NFTA_CHAIN_ID]) {
+			nft_trans_chain_id(trans) =
+				ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
+		}
+	}
+	nft_trans_chain(trans) = ctx->chain;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return trans;
+}
+
+static int nft_delchain(struct nft_ctx *ctx)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans);
+
+	nft_use_dec(&ctx->table->use);
+	nft_deactivate_next(ctx->net, ctx->chain);
+
+	return 0;
+}
+
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (nft_expr_more(rule, expr)) {
+		if (expr->ops->activate)
+			expr->ops->activate(ctx, expr);
+
+		expr = nft_expr_next(expr);
+	}
+}
+
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+			      enum nft_trans_phase phase)
+{
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (nft_expr_more(rule, expr)) {
+		if (expr->ops->deactivate)
+			expr->ops->deactivate(ctx, expr, phase);
+
+		expr = nft_expr_next(expr);
+	}
+}
+
+static int
+nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	/* You cannot delete the same rule twice */
+	if (nft_is_active_next(ctx->net, rule)) {
+		nft_deactivate_next(ctx->net, rule);
+		nft_use_dec(&ctx->chain->use);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+					    struct nft_rule *rule)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+	if (trans == NULL)
+		return NULL;
+
+	if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) {
+		nft_trans_rule_id(trans) =
+			ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
+	}
+	nft_trans_rule(trans) = rule;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return trans;
+}
+
+static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	struct nft_flow_rule *flow;
+	struct nft_trans *trans;
+	int err;
+
+	trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+		flow = nft_flow_rule_create(ctx->net, rule);
+		if (IS_ERR(flow)) {
+			nft_trans_destroy(trans);
+			return PTR_ERR(flow);
+		}
+
+		nft_trans_flow_rule(trans) = flow;
+	}
+
+	err = nf_tables_delrule_deactivate(ctx, rule);
+	if (err < 0) {
+		nft_trans_destroy(trans);
+		return err;
+	}
+	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
+
+	return 0;
+}
+
+static int nft_delrule_by_chain(struct nft_ctx *ctx)
+{
+	struct nft_rule *rule;
+	int err;
+
+	list_for_each_entry(rule, &ctx->chain->rules, list) {
+		if (!nft_is_active_next(ctx->net, rule))
+			continue;
+
+		err = nft_delrule(ctx, rule);
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+			       struct nft_set *set,
+			       const struct nft_set_desc *desc)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
+		nft_trans_set_id(trans) =
+			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+		nft_activate_next(ctx->net, set);
+	}
+	nft_trans_set(trans) = set;
+	if (desc) {
+		nft_trans_set_update(trans) = true;
+		nft_trans_set_gc_int(trans) = desc->gc_int;
+		nft_trans_set_timeout(trans) = desc->timeout;
+	}
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+}
+
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+			     struct nft_set *set)
+{
+	return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+				  struct nft_set *set,
+				  const struct nft_set_iter *iter,
+				  struct nft_set_elem *elem)
+{
+	nft_setelem_data_deactivate(ctx->net, set, elem);
+
+	return 0;
+}
+
+struct nft_set_elem_catchall {
+	struct list_head	list;
+	struct rcu_head		rcu;
+	void			*elem;
+};
+
+static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
+					struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		nft_setelem_data_deactivate(ctx->net, set, &elem);
+		break;
+	}
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	struct nft_set_iter iter = {
+		.genmask	= nft_genmask_next(ctx->net),
+		.fn		= nft_mapelem_deactivate,
+	};
+
+	set->ops->walk(ctx, set, &iter);
+	WARN_ON_ONCE(iter.err);
+
+	nft_map_catchall_deactivate(ctx, set);
+}
+
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	int err;
+
+	err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
+	if (err < 0)
+		return err;
+
+	if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+		nft_map_deactivate(ctx, set);
+
+	nft_deactivate_next(ctx->net, set);
+	nft_use_dec(&ctx->table->use);
+
+	return err;
+}
+
+static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
+			     struct nft_object *obj)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWOBJ)
+		nft_activate_next(ctx->net, obj);
+
+	nft_trans_obj(trans) = obj;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+}
+
+static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
+{
+	int err;
+
+	err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj);
+	if (err < 0)
+		return err;
+
+	nft_deactivate_next(ctx->net, obj);
+	nft_use_dec(&ctx->table->use);
+
+	return err;
+}
+
+static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+				   struct nft_flowtable *flowtable)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type,
+				sizeof(struct nft_trans_flowtable));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWFLOWTABLE)
+		nft_activate_next(ctx->net, flowtable);
+
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+}
+
+static int nft_delflowtable(struct nft_ctx *ctx,
+			    struct nft_flowtable *flowtable)
+{
+	int err;
+
+	err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+	if (err < 0)
+		return err;
+
+	nft_deactivate_next(ctx->net, flowtable);
+	nft_use_dec(&ctx->table->use);
+
+	return err;
+}
+
+static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
+{
+	int i;
+
+	for (i = track->regs[dreg].num_reg; i > 0; i--)
+		__nft_reg_track_cancel(track, dreg - i);
+}
+
+static void __nft_reg_track_update(struct nft_regs_track *track,
+				   const struct nft_expr *expr,
+				   u8 dreg, u8 num_reg)
+{
+	track->regs[dreg].selector = expr;
+	track->regs[dreg].bitwise = NULL;
+	track->regs[dreg].num_reg = num_reg;
+}
+
+void nft_reg_track_update(struct nft_regs_track *track,
+			  const struct nft_expr *expr, u8 dreg, u8 len)
+{
+	unsigned int regcount;
+	int i;
+
+	__nft_reg_track_clobber(track, dreg);
+
+	regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+	for (i = 0; i < regcount; i++, dreg++)
+		__nft_reg_track_update(track, expr, dreg, i);
+}
+EXPORT_SYMBOL_GPL(nft_reg_track_update);
+
+void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len)
+{
+	unsigned int regcount;
+	int i;
+
+	__nft_reg_track_clobber(track, dreg);
+
+	regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+	for (i = 0; i < regcount; i++, dreg++)
+		__nft_reg_track_cancel(track, dreg);
+}
+EXPORT_SYMBOL_GPL(nft_reg_track_cancel);
+
+void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg)
+{
+	track->regs[dreg].selector = NULL;
+	track->regs[dreg].bitwise = NULL;
+	track->regs[dreg].num_reg = 0;
+}
+EXPORT_SYMBOL_GPL(__nft_reg_track_cancel);
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct net *net,
+					  const struct nlattr *nla,
+					  u8 family, u8 genmask, u32 nlpid)
+{
+	struct nftables_pernet *nft_net;
+	struct nft_table *table;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	nft_net = nft_pernet(net);
+	list_for_each_entry_rcu(table, &nft_net->tables, list,
+				lockdep_is_held(&nft_net->commit_mutex)) {
+		if (!nla_strcmp(nla, table->name) &&
+		    table->family == family &&
+		    nft_active_genmask(table, genmask)) {
+			if (nft_table_has_owner(table) &&
+			    nlpid && table->nlpid != nlpid)
+				return ERR_PTR(-EPERM);
+
+			return table;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
+						   const struct nlattr *nla,
+						   int family, u8 genmask, u32 nlpid)
+{
+	struct nftables_pernet *nft_net;
+	struct nft_table *table;
+
+	nft_net = nft_pernet(net);
+	list_for_each_entry(table, &nft_net->tables, list) {
+		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
+		    table->family == family &&
+		    nft_active_genmask(table, genmask)) {
+			if (nft_table_has_owner(table) &&
+			    nlpid && table->nlpid != nlpid)
+				return ERR_PTR(-EPERM);
+
+			return table;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+	return ++table->hgenerator;
+}
+
+static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
+
+static const struct nft_chain_type *
+__nft_chain_type_get(u8 family, enum nft_chain_types type)
+{
+	if (family >= NFPROTO_NUMPROTO ||
+	    type >= NFT_CHAIN_T_MAX)
+		return NULL;
+
+	return chain_type[family][type];
+}
+
+static const struct nft_chain_type *
+__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
+{
+	const struct nft_chain_type *type;
+	int i;
+
+	for (i = 0; i < NFT_CHAIN_T_MAX; i++) {
+		type = __nft_chain_type_get(family, i);
+		if (!type)
+			continue;
+		if (!nla_strcmp(nla, type->name))
+			return type;
+	}
+	return NULL;
+}
+
+struct nft_module_request {
+	struct list_head	list;
+	char			module[MODULE_NAME_LEN];
+	bool			done;
+};
+
+#ifdef CONFIG_MODULES
+__printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
+				      ...)
+{
+	char module_name[MODULE_NAME_LEN];
+	struct nftables_pernet *nft_net;
+	struct nft_module_request *req;
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+	va_end(args);
+	if (ret >= MODULE_NAME_LEN)
+		return 0;
+
+	nft_net = nft_pernet(net);
+	list_for_each_entry(req, &nft_net->module_list, list) {
+		if (!strcmp(req->module, module_name)) {
+			if (req->done)
+				return 0;
+
+			/* A request to load this module already exists. */
+			return -EAGAIN;
+		}
+	}
+
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->done = false;
+	strscpy(req->module, module_name, MODULE_NAME_LEN);
+	list_add_tail(&req->list, &nft_net->module_list);
+
+	return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(nft_request_module);
+#endif
+
+static void lockdep_nfnl_nft_mutex_not_held(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	if (debug_locks)
+		WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+#endif
+}
+
+static const struct nft_chain_type *
+nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
+			    u8 family, bool autoload)
+{
+	const struct nft_chain_type *type;
+
+	type = __nf_tables_chain_type_lookup(nla, family);
+	if (type != NULL)
+		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		if (nft_request_module(net, "nft-chain-%u-%.*s", family,
+				       nla_len(nla),
+				       (const char *)nla_data(nla)) == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static __be16 nft_base_seq(const struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	return htons(nft_net->base_seq & 0xffff);
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_TABLE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_TABLE_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN }
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+				     u32 portid, u32 seq, int event, u32 flags,
+				     int family, const struct nft_table *table)
+{
+	struct nlmsghdr *nlh;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+	    nla_put_be32(skb, NFTA_TABLE_FLAGS,
+			 htonl(table->flags & NFT_TABLE_F_MASK)) ||
+	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
+	    nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
+			 NFTA_TABLE_PAD))
+		goto nla_put_failure;
+	if (nft_table_has_owner(table) &&
+	    nla_put_be32(skb, NFTA_TABLE_OWNER, htonl(table->nlpid)))
+		goto nla_put_failure;
+
+	if (table->udata) {
+		if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata))
+			goto nla_put_failure;
+	}
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+struct nftnl_skb_parms {
+	bool report;
+};
+#define NFT_CB(skb)	(*(struct nftnl_skb_parms*)&((skb)->cb))
+
+static void nft_notify_enqueue(struct sk_buff *skb, bool report,
+			       struct list_head *notify_list)
+{
+	NFT_CB(skb).report = report;
+	list_add_tail(&skb->list, notify_list);
+}
+
+static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
+{
+	struct nftables_pernet *nft_net;
+	struct sk_buff *skb;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+					event, flags, ctx->family, ctx->table);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_net = nft_pernet(ctx->net);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nftables_pernet *nft_net;
+	const struct nft_table *table;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (family != NFPROTO_UNSPEC && family != table->family)
+			continue;
+
+		if (idx < s_idx)
+			goto cont;
+		if (idx > s_idx)
+			memset(&cb->args[1], 0,
+			       sizeof(cb->args) - sizeof(cb->args[0]));
+		if (!nft_is_active(net, table))
+			continue;
+		if (nf_tables_fill_table_info(skb, net,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      NFT_MSG_NEWTABLE, NLM_F_MULTI,
+					      table->family, table) < 0)
+			goto done;
+
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+		idx++;
+	}
+done:
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
+				      const struct nlmsghdr *nlh,
+				      struct netlink_dump_control *c)
+{
+	int err;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	rcu_read_unlock();
+	err = netlink_dump_start(nlsk, skb, nlh, c);
+	rcu_read_lock();
+	module_put(THIS_MODULE);
+
+	return err;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_table *table;
+	struct net *net = info->net;
+	struct sk_buff *skb2;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_tables,
+			.module = THIS_MODULE,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
+		return PTR_ERR(table);
+	}
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
+					info->nlh->nlmsg_seq, NFT_MSG_NEWTABLE,
+					0, family, table);
+	if (err < 0)
+		goto err_fill_table_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_table_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
+{
+	struct nft_chain *chain;
+	u32 i = 0;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
+		if (!nft_is_base_chain(chain))
+			continue;
+
+		if (cnt && i++ == cnt)
+			break;
+
+		nf_tables_unregister_hook(net, table, chain);
+	}
+}
+
+static int nf_tables_table_enable(struct net *net, struct nft_table *table)
+{
+	struct nft_chain *chain;
+	int err, i = 0;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_active_next(net, chain))
+			continue;
+		if (!nft_is_base_chain(chain))
+			continue;
+
+		err = nf_tables_register_hook(net, table, chain);
+		if (err < 0)
+			goto err_register_hooks;
+
+		i++;
+	}
+	return 0;
+
+err_register_hooks:
+	if (i)
+		nft_table_disable(net, table, i);
+	return err;
+}
+
+static void nf_tables_table_disable(struct net *net, struct nft_table *table)
+{
+	table->flags &= ~NFT_TABLE_F_DORMANT;
+	nft_table_disable(net, table, 0);
+	table->flags |= NFT_TABLE_F_DORMANT;
+}
+
+#define __NFT_TABLE_F_INTERNAL		(NFT_TABLE_F_MASK + 1)
+#define __NFT_TABLE_F_WAS_DORMANT	(__NFT_TABLE_F_INTERNAL << 0)
+#define __NFT_TABLE_F_WAS_AWAKEN	(__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_UPDATE		(__NFT_TABLE_F_WAS_DORMANT | \
+					 __NFT_TABLE_F_WAS_AWAKEN)
+
+static int nf_tables_updtable(struct nft_ctx *ctx)
+{
+	struct nft_trans *trans;
+	u32 flags;
+	int ret;
+
+	if (!ctx->nla[NFTA_TABLE_FLAGS])
+		return 0;
+
+	flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+	if (flags & ~NFT_TABLE_F_MASK)
+		return -EOPNOTSUPP;
+
+	if (flags == ctx->table->flags)
+		return 0;
+
+	if ((nft_table_has_owner(ctx->table) &&
+	     !(flags & NFT_TABLE_F_OWNER)) ||
+	    (!nft_table_has_owner(ctx->table) &&
+	     flags & NFT_TABLE_F_OWNER))
+		return -EOPNOTSUPP;
+
+	/* No dormant off/on/off/on games in single transaction */
+	if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+		return -EINVAL;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+				sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if ((flags & NFT_TABLE_F_DORMANT) &&
+	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+		ctx->table->flags |= NFT_TABLE_F_DORMANT;
+		if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
+			ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
+	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
+		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
+		ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+		if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
+			ret = nf_tables_table_enable(ctx->net, ctx->table);
+			if (ret < 0)
+				goto err_register_hooks;
+
+			ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
+		}
+	}
+
+	nft_trans_table_update(trans) = true;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+
+err_register_hooks:
+	nft_trans_destroy(trans);
+	return ret;
+}
+
+static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
+{
+	const char *name = data;
+
+	return jhash(name, strlen(name), seed);
+}
+
+static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_chain *chain = data;
+
+	return nft_chain_hash(chain->name, 0, seed);
+}
+
+static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
+			      const void *ptr)
+{
+	const struct nft_chain *chain = ptr;
+	const char *name = arg->key;
+
+	return strcmp(chain->name, name);
+}
+
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object_hash_key *k = data;
+
+	seed ^= hash_ptr(k->table, 32);
+
+	return jhash(k->name, strlen(k->name), seed);
+}
+
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object *obj = data;
+
+	return nft_objname_hash(&obj->key, 0, seed);
+}
+
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
+				const void *ptr)
+{
+	const struct nft_object_hash_key *k = arg->key;
+	const struct nft_object *obj = ptr;
+
+	if (obj->key.table != k->table)
+		return -1;
+
+	return strcmp(obj->key.name, k->name);
+}
+
+static bool nft_supported_family(u8 family)
+{
+	return false
+#ifdef CONFIG_NF_TABLES_INET
+		|| family == NFPROTO_INET
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+		|| family == NFPROTO_IPV4
+#endif
+#ifdef CONFIG_NF_TABLES_ARP
+		|| family == NFPROTO_ARP
+#endif
+#ifdef CONFIG_NF_TABLES_NETDEV
+		|| family == NFPROTO_NETDEV
+#endif
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
+		|| family == NFPROTO_BRIDGE
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+		|| family == NFPROTO_IPV6
+#endif
+		;
+}
+
+static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_ctx ctx;
+	u32 flags = 0;
+	int err;
+
+	if (!nft_supported_family(family))
+		return -EOPNOTSUPP;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+	attr = nla[NFTA_TABLE_NAME];
+	table = nft_table_lookup(net, attr, family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		if (PTR_ERR(table) != -ENOENT)
+			return PTR_ERR(table);
+	} else {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, attr);
+			return -EEXIST;
+		}
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+		return nf_tables_updtable(&ctx);
+	}
+
+	if (nla[NFTA_TABLE_FLAGS]) {
+		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+		if (flags & ~NFT_TABLE_F_MASK)
+			return -EOPNOTSUPP;
+	}
+
+	err = -ENOMEM;
+	table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
+	if (table == NULL)
+		goto err_kzalloc;
+
+	table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
+	if (table->name == NULL)
+		goto err_strdup;
+
+	if (nla[NFTA_TABLE_USERDATA]) {
+		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
+		if (table->udata == NULL)
+			goto err_table_udata;
+
+		table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]);
+	}
+
+	err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
+	if (err)
+		goto err_chain_ht;
+
+	INIT_LIST_HEAD(&table->chains);
+	INIT_LIST_HEAD(&table->sets);
+	INIT_LIST_HEAD(&table->objects);
+	INIT_LIST_HEAD(&table->flowtables);
+	table->family = family;
+	table->flags = flags;
+	table->handle = ++nft_net->table_handle;
+	if (table->flags & NFT_TABLE_F_OWNER)
+		table->nlpid = NETLINK_CB(skb).portid;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+	if (err < 0)
+		goto err_trans;
+
+	list_add_tail_rcu(&table->list, &nft_net->tables);
+	return 0;
+err_trans:
+	rhltable_destroy(&table->chains_ht);
+err_chain_ht:
+	kfree(table->udata);
+err_table_udata:
+	kfree(table->name);
+err_strdup:
+	kfree(table);
+err_kzalloc:
+	return err;
+}
+
+static int nft_flush_table(struct nft_ctx *ctx)
+{
+	struct nft_flowtable *flowtable, *nft;
+	struct nft_chain *chain, *nc;
+	struct nft_object *obj, *ne;
+	struct nft_set *set, *ns;
+	int err;
+
+	list_for_each_entry(chain, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
+		if (nft_chain_binding(chain))
+			continue;
+
+		ctx->chain = chain;
+
+		err = nft_delrule_by_chain(ctx);
+		if (err < 0)
+			goto out;
+	}
+
+	list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
+
+		if (nft_set_is_anonymous(set))
+			continue;
+
+		err = nft_delset(ctx, set);
+		if (err < 0)
+			goto out;
+	}
+
+	list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
+		if (!nft_is_active_next(ctx->net, flowtable))
+			continue;
+
+		err = nft_delflowtable(ctx, flowtable);
+		if (err < 0)
+			goto out;
+	}
+
+	list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
+		if (!nft_is_active_next(ctx->net, obj))
+			continue;
+
+		err = nft_delobj(ctx, obj);
+		if (err < 0)
+			goto out;
+	}
+
+	list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+		if (!nft_is_active_next(ctx->net, chain))
+			continue;
+
+		if (nft_chain_binding(chain))
+			continue;
+
+		ctx->chain = chain;
+
+		err = nft_delchain(ctx);
+		if (err < 0)
+			goto out;
+	}
+
+	err = nft_deltable(ctx);
+out:
+	return err;
+}
+
+static int nft_flush(struct nft_ctx *ctx, int family)
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table, *nt;
+	int err = 0;
+
+	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
+		if (family != AF_UNSPEC && table->family != family)
+			continue;
+
+		ctx->family = table->family;
+
+		if (!nft_is_active_next(ctx->net, table))
+			continue;
+
+		if (nft_table_has_owner(table) && table->nlpid != ctx->portid)
+			continue;
+
+		if (nla[NFTA_TABLE_NAME] &&
+		    nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+			continue;
+
+		ctx->table = table;
+
+		err = nft_flush_table(ctx);
+		if (err < 0)
+			goto out;
+	}
+out:
+	return err;
+}
+
+static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_ctx ctx;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, 0, NULL, NULL, nla);
+	if (family == AF_UNSPEC ||
+	    (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
+		return nft_flush(&ctx, family);
+
+	if (nla[NFTA_TABLE_HANDLE]) {
+		attr = nla[NFTA_TABLE_HANDLE];
+		table = nft_table_lookup_byhandle(net, attr, family, genmask,
+						  NETLINK_CB(skb).portid);
+	} else {
+		attr = nla[NFTA_TABLE_NAME];
+		table = nft_table_lookup(net, attr, family, genmask,
+					 NETLINK_CB(skb).portid);
+	}
+
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(table);
+	}
+
+	if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
+	    table->use > 0)
+		return -EBUSY;
+
+	ctx.family = family;
+	ctx.table = table;
+
+	return nft_flush_table(&ctx);
+}
+
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+	if (WARN_ON(ctx->table->use > 0))
+		return;
+
+	rhltable_destroy(&ctx->table->chains_ht);
+	kfree(ctx->table->name);
+	kfree(ctx->table->udata);
+	kfree(ctx->table);
+}
+
+void nft_register_chain_type(const struct nft_chain_type *ctype)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		return;
+	}
+	chain_type[ctype->family][ctype->type] = ctype;
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_register_chain_type);
+
+void nft_unregister_chain_type(const struct nft_chain_type *ctype)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	chain_type[ctype->family][ctype->type] = NULL;
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
+{
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (chain->handle == handle &&
+		    nft_active_genmask(chain, genmask))
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static bool lockdep_commit_lock_is_held(const struct net *net)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	return lockdep_is_held(&nft_net->commit_mutex);
+#else
+	return true;
+#endif
+}
+
+static struct nft_chain *nft_chain_lookup(struct net *net,
+					  struct nft_table *table,
+					  const struct nlattr *nla, u8 genmask)
+{
+	char search[NFT_CHAIN_MAXNAMELEN + 1];
+	struct rhlist_head *tmp, *list;
+	struct nft_chain *chain;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	nla_strscpy(search, nla, sizeof(search));
+
+	WARN_ON(!rcu_read_lock_held() &&
+		!lockdep_commit_lock_is_held(net));
+
+	chain = ERR_PTR(-ENOENT);
+	rcu_read_lock();
+	list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
+	if (!list)
+		goto out_unlock;
+
+	rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
+		if (nft_active_genmask(chain, genmask))
+			goto out_unlock;
+	}
+	chain = ERR_PTR(-ENOENT);
+out_unlock:
+	rcu_read_unlock();
+	return chain;
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
+	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
+	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING,
+				    .len = NFT_MODULE_AUTOLOAD_LIMIT },
+	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
+	[NFTA_CHAIN_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_CHAIN_ID]		= { .type = NLA_U32 },
+	[NFTA_CHAIN_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFTA_HOOK_DEV]		= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
+};
+
+static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
+{
+	struct nft_stats *cpu_stats, total;
+	struct nlattr *nest;
+	unsigned int seq;
+	u64 pkts, bytes;
+	int cpu;
+
+	if (!stats)
+		return 0;
+
+	memset(&total, 0, sizeof(total));
+	for_each_possible_cpu(cpu) {
+		cpu_stats = per_cpu_ptr(stats, cpu);
+		do {
+			seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			pkts = cpu_stats->pkts;
+			bytes = cpu_stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+		total.pkts += pkts;
+		total.bytes += bytes;
+	}
+	nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts),
+			 NFTA_COUNTER_PAD) ||
+	    nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
+			 NFTA_COUNTER_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+				   const struct nft_base_chain *basechain)
+{
+	const struct nf_hook_ops *ops = &basechain->ops;
+	struct nft_hook *hook, *first = NULL;
+	struct nlattr *nest, *nest_devs;
+	int n = 0;
+
+	nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+		goto nla_put_failure;
+
+	if (nft_base_chain_netdev(family, ops->hooknum)) {
+		nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+		list_for_each_entry(hook, &basechain->hook_list, list) {
+			if (!first)
+				first = hook;
+
+			if (nla_put_string(skb, NFTA_DEVICE_NAME,
+					   hook->ops.dev->name))
+				goto nla_put_failure;
+			n++;
+		}
+		nla_nest_end(skb, nest_devs);
+
+		if (n == 1 &&
+		    nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+
+	return 0;
+nla_put_failure:
+	return -1;
+}
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+				     u32 portid, u32 seq, int event, u32 flags,
+				     int family, const struct nft_table *table,
+				     const struct nft_chain *chain)
+{
+	struct nlmsghdr *nlh;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
+			 NFTA_CHAIN_PAD))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+		goto nla_put_failure;
+
+	if (nft_is_base_chain(chain)) {
+		const struct nft_base_chain *basechain = nft_base_chain(chain);
+		struct nft_stats __percpu *stats;
+
+		if (nft_dump_basechain_hook(skb, family, basechain))
+			goto nla_put_failure;
+
+		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
+				 htonl(basechain->policy)))
+			goto nla_put_failure;
+
+		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
+			goto nla_put_failure;
+
+		stats = rcu_dereference_check(basechain->stats,
+					      lockdep_commit_lock_is_held(net));
+		if (nft_dump_stats(skb, stats))
+			goto nla_put_failure;
+	}
+
+	if (chain->flags &&
+	    nla_put_be32(skb, NFTA_CHAIN_FLAGS, htonl(chain->flags)))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
+		goto nla_put_failure;
+
+	if (chain->udata &&
+	    nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
+{
+	struct nftables_pernet *nft_net;
+	struct sk_buff *skb;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+					event, flags, ctx->family, ctx->table,
+					ctx->chain);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_net = nft_pernet(ctx->net);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (family != NFPROTO_UNSPEC && family != table->family)
+			continue;
+
+		list_for_each_entry_rcu(chain, &table->chains, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (!nft_is_active(net, chain))
+				continue;
+			if (nf_tables_fill_chain_info(skb, net,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      NFT_MSG_NEWCHAIN,
+						      NLM_F_MULTI,
+						      table->family, table,
+						      chain) < 0)
+				goto done;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+	}
+done:
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_chain *chain;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct sk_buff *skb2;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_chains,
+			.module = THIS_MODULE,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
+		return PTR_ERR(chain);
+	}
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
+					info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN,
+					0, family, table, chain);
+	if (err < 0)
+		goto err_fill_chain_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_chain_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
+{
+	struct nlattr *tb[NFTA_COUNTER_MAX+1];
+	struct nft_stats __percpu *newstats;
+	struct nft_stats *stats;
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
+					  nft_counter_policy, NULL);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
+		return ERR_PTR(-EINVAL);
+
+	newstats = netdev_alloc_pcpu_stats(struct nft_stats);
+	if (newstats == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Restore old counters on this cpu, no problem. Per-cpu statistics
+	 * are not exposed to userspace.
+	 */
+	preempt_disable();
+	stats = this_cpu_ptr(newstats);
+	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	preempt_enable();
+
+	return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_trans *trans)
+{
+	struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
+
+	if (!nft_trans_chain_stats(trans))
+		return;
+
+	nft_trans_chain_stats(trans) =
+		rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
+				    lockdep_commit_lock_is_held(trans->ctx.net));
+
+	if (!nft_trans_chain_stats(trans))
+		static_branch_inc(&nft_counters_enabled);
+}
+
+static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
+{
+	struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0);
+	struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1);
+
+	if (g0 != g1)
+		kvfree(g1);
+	kvfree(g0);
+
+	/* should be NULL either via abort or via successful commit */
+	WARN_ON_ONCE(chain->blob_next);
+	kvfree(chain->blob_next);
+}
+
+void nf_tables_chain_destroy(struct nft_ctx *ctx)
+{
+	struct nft_chain *chain = ctx->chain;
+	struct nft_hook *hook, *next;
+
+	if (WARN_ON(chain->use > 0))
+		return;
+
+	/* no concurrent access possible anymore */
+	nf_tables_chain_free_chain_rules(chain);
+
+	if (nft_is_base_chain(chain)) {
+		struct nft_base_chain *basechain = nft_base_chain(chain);
+
+		if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) {
+			list_for_each_entry_safe(hook, next,
+						 &basechain->hook_list, list) {
+				list_del_rcu(&hook->list);
+				kfree_rcu(hook, rcu);
+			}
+		}
+		module_put(basechain->type->owner);
+		if (rcu_access_pointer(basechain->stats)) {
+			static_branch_dec(&nft_counters_enabled);
+			free_percpu(rcu_dereference_raw(basechain->stats));
+		}
+		kfree(chain->name);
+		kfree(chain->udata);
+		kfree(basechain);
+	} else {
+		kfree(chain->name);
+		kfree(chain->udata);
+		kfree(chain);
+	}
+}
+
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+					      const struct nlattr *attr)
+{
+	struct net_device *dev;
+	char ifname[IFNAMSIZ];
+	struct nft_hook *hook;
+	int err;
+
+	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+	if (!hook) {
+		err = -ENOMEM;
+		goto err_hook_alloc;
+	}
+
+	nla_strscpy(ifname, attr, IFNAMSIZ);
+	/* nf_tables_netdev_event() is called under rtnl_mutex, this is
+	 * indirectly serializing all the other holders of the commit_mutex with
+	 * the rtnl_mutex.
+	 */
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev) {
+		err = -ENOENT;
+		goto err_hook_dev;
+	}
+	hook->ops.dev = dev;
+
+	return hook;
+
+err_hook_dev:
+	kfree(hook);
+err_hook_alloc:
+	return ERR_PTR(err);
+}
+
+static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
+					   const struct nft_hook *this)
+{
+	struct nft_hook *hook;
+
+	list_for_each_entry(hook, hook_list, list) {
+		if (this->ops.dev == hook->ops.dev)
+			return hook;
+	}
+
+	return NULL;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+					const struct nlattr *attr,
+					struct list_head *hook_list)
+{
+	struct nft_hook *hook, *next;
+	const struct nlattr *tmp;
+	int rem, n = 0, err;
+
+	nla_for_each_nested(tmp, attr, rem) {
+		if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+			err = -EINVAL;
+			goto err_hook;
+		}
+
+		hook = nft_netdev_hook_alloc(net, tmp);
+		if (IS_ERR(hook)) {
+			err = PTR_ERR(hook);
+			goto err_hook;
+		}
+		if (nft_hook_list_find(hook_list, hook)) {
+			kfree(hook);
+			err = -EEXIST;
+			goto err_hook;
+		}
+		list_add_tail(&hook->list, hook_list);
+		n++;
+
+		if (n == NFT_NETDEVICE_MAX) {
+			err = -EFBIG;
+			goto err_hook;
+		}
+	}
+
+	return 0;
+
+err_hook:
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		list_del(&hook->list);
+		kfree(hook);
+	}
+	return err;
+}
+
+struct nft_chain_hook {
+	u32				num;
+	s32				priority;
+	const struct nft_chain_type	*type;
+	struct list_head		list;
+};
+
+static int nft_chain_parse_netdev(struct net *net,
+				  struct nlattr *tb[],
+				  struct list_head *hook_list)
+{
+	struct nft_hook *hook;
+	int err;
+
+	if (tb[NFTA_HOOK_DEV]) {
+		hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+		if (IS_ERR(hook))
+			return PTR_ERR(hook);
+
+		list_add_tail(&hook->list, hook_list);
+	} else if (tb[NFTA_HOOK_DEVS]) {
+		err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+						   hook_list);
+		if (err < 0)
+			return err;
+
+		if (list_empty(hook_list))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_chain_parse_hook(struct net *net,
+				const struct nlattr * const nla[],
+				struct nft_chain_hook *hook, u8 family,
+				struct netlink_ext_ack *extack, bool autoload)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	const struct nft_chain_type *type;
+	int err;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
+
+	err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX,
+					  nla[NFTA_CHAIN_HOOK],
+					  nft_hook_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+	    ha[NFTA_HOOK_PRIORITY] == NULL)
+		return -EINVAL;
+
+	hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+	hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+	type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT);
+	if (!type)
+		return -EOPNOTSUPP;
+
+	if (nla[NFTA_CHAIN_TYPE]) {
+		type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
+						   family, autoload);
+		if (IS_ERR(type)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
+			return PTR_ERR(type);
+		}
+	}
+	if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
+		return -EOPNOTSUPP;
+
+	if (type->type == NFT_CHAIN_T_NAT &&
+	    hook->priority <= NF_IP_PRI_CONNTRACK)
+		return -EOPNOTSUPP;
+
+	if (!try_module_get(type->owner)) {
+		if (nla[NFTA_CHAIN_TYPE])
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
+		return -ENOENT;
+	}
+
+	hook->type = type;
+
+	INIT_LIST_HEAD(&hook->list);
+	if (nft_base_chain_netdev(family, hook->num)) {
+		err = nft_chain_parse_netdev(net, ha, &hook->list);
+		if (err < 0) {
+			module_put(type->owner);
+			return err;
+		}
+	} else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
+		module_put(type->owner);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+	struct nft_hook *h, *next;
+
+	list_for_each_entry_safe(h, next, &hook->list, list) {
+		list_del(&h->list);
+		kfree(h);
+	}
+	module_put(hook->type->owner);
+}
+
+struct nft_rules_old {
+	struct rcu_head h;
+	struct nft_rule_blob *blob;
+};
+
+static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
+{
+	struct nft_rule_dp *prule;
+
+	prule = (struct nft_rule_dp *)ptr;
+	prule->is_last = 1;
+	/* blob size does not include the trailer rule */
+}
+
+static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
+{
+	struct nft_rule_blob *blob;
+
+	/* size must include room for the last rule */
+	if (size < offsetof(struct nft_rule_dp, data))
+		return NULL;
+
+	size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
+	if (size > INT_MAX)
+		return NULL;
+
+	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!blob)
+		return NULL;
+
+	blob->size = 0;
+	nft_last_rule(blob, blob->data);
+
+	return blob;
+}
+
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+				    const struct nft_chain_hook *hook,
+				    struct nft_chain *chain)
+{
+	ops->pf			= family;
+	ops->hooknum		= hook->num;
+	ops->priority		= hook->priority;
+	ops->priv		= chain;
+	ops->hook		= hook->type->hooks[ops->hooknum];
+	ops->hook_ops_type	= NF_HOOK_OP_NF_TABLES;
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+			      struct nft_chain_hook *hook, u32 flags)
+{
+	struct nft_chain *chain;
+	struct nft_hook *h;
+
+	basechain->type = hook->type;
+	INIT_LIST_HEAD(&basechain->hook_list);
+	chain = &basechain->chain;
+
+	if (nft_base_chain_netdev(family, hook->num)) {
+		list_splice_init(&hook->list, &basechain->hook_list);
+		list_for_each_entry(h, &basechain->hook_list, list)
+			nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+		basechain->ops.hooknum	= hook->num;
+		basechain->ops.priority	= hook->priority;
+	} else {
+		nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+	}
+
+	chain->flags |= NFT_CHAIN_BASE | flags;
+	basechain->policy = NF_ACCEPT;
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+	    !nft_chain_offload_support(basechain)) {
+		list_splice_init(&basechain->hook_list, &hook->list);
+		return -EOPNOTSUPP;
+	}
+
+	flow_block_init(&basechain->flow_block);
+
+	return 0;
+}
+
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+{
+	int err;
+
+	err = rhltable_insert_key(&table->chains_ht, chain->name,
+				  &chain->rhlhead, nft_chain_ht_params);
+	if (err)
+		return err;
+
+	list_add_tail_rcu(&chain->list, &table->chains);
+
+	return 0;
+}
+
+static u64 chain_id;
+
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+			      u8 policy, u32 flags,
+			      struct netlink_ext_ack *extack)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_base_chain *basechain;
+	struct net *net = ctx->net;
+	char name[NFT_NAME_MAXLEN];
+	struct nft_rule_blob *blob;
+	struct nft_trans *trans;
+	struct nft_chain *chain;
+	unsigned int data_size;
+	int err;
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		struct nft_stats __percpu *stats = NULL;
+		struct nft_chain_hook hook;
+
+		if (flags & NFT_CHAIN_BINDING)
+			return -EOPNOTSUPP;
+
+		err = nft_chain_parse_hook(net, nla, &hook, family, extack,
+					   true);
+		if (err < 0)
+			return err;
+
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
+		if (basechain == NULL) {
+			nft_chain_release_hook(&hook);
+			return -ENOMEM;
+		}
+		chain = &basechain->chain;
+
+		if (nla[NFTA_CHAIN_COUNTERS]) {
+			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+			if (IS_ERR(stats)) {
+				nft_chain_release_hook(&hook);
+				kfree(basechain);
+				return PTR_ERR(stats);
+			}
+			rcu_assign_pointer(basechain->stats, stats);
+		}
+
+		err = nft_basechain_init(basechain, family, &hook, flags);
+		if (err < 0) {
+			nft_chain_release_hook(&hook);
+			kfree(basechain);
+			free_percpu(stats);
+			return err;
+		}
+		if (stats)
+			static_branch_inc(&nft_counters_enabled);
+	} else {
+		if (flags & NFT_CHAIN_BASE)
+			return -EINVAL;
+		if (flags & NFT_CHAIN_HW_OFFLOAD)
+			return -EOPNOTSUPP;
+
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
+		if (chain == NULL)
+			return -ENOMEM;
+
+		chain->flags = flags;
+	}
+	ctx->chain = chain;
+
+	INIT_LIST_HEAD(&chain->rules);
+	chain->handle = nf_tables_alloc_handle(table);
+	chain->table = table;
+
+	if (nla[NFTA_CHAIN_NAME]) {
+		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
+	} else {
+		if (!(flags & NFT_CHAIN_BINDING)) {
+			err = -EINVAL;
+			goto err_destroy_chain;
+		}
+
+		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
+		chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
+	}
+
+	if (!chain->name) {
+		err = -ENOMEM;
+		goto err_destroy_chain;
+	}
+
+	if (nla[NFTA_CHAIN_USERDATA]) {
+		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
+		if (chain->udata == NULL) {
+			err = -ENOMEM;
+			goto err_destroy_chain;
+		}
+		chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
+	}
+
+	data_size = offsetof(struct nft_rule_dp, data);	/* last rule */
+	blob = nf_tables_chain_alloc_rules(data_size);
+	if (!blob) {
+		err = -ENOMEM;
+		goto err_destroy_chain;
+	}
+
+	RCU_INIT_POINTER(chain->blob_gen_0, blob);
+	RCU_INIT_POINTER(chain->blob_gen_1, blob);
+
+	err = nf_tables_register_hook(net, table, chain);
+	if (err < 0)
+		goto err_destroy_chain;
+
+	if (!nft_use_inc(&table->use)) {
+		err = -EMFILE;
+		goto err_use;
+	}
+
+	trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+	if (IS_ERR(trans)) {
+		err = PTR_ERR(trans);
+		goto err_unregister_hook;
+	}
+
+	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
+	if (nft_is_base_chain(chain))
+		nft_trans_chain_policy(trans) = policy;
+
+	err = nft_chain_add(table, chain);
+	if (err < 0) {
+		nft_trans_destroy(trans);
+		goto err_unregister_hook;
+	}
+
+	return 0;
+
+err_unregister_hook:
+	nft_use_dec_restore(&table->use);
+err_use:
+	nf_tables_unregister_hook(net, table, chain);
+err_destroy_chain:
+	nf_tables_chain_destroy(ctx);
+
+	return err;
+}
+
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+				struct list_head *hook_list2)
+{
+	struct nft_hook *hook;
+	int n = 0, m = 0;
+
+	n = 0;
+	list_for_each_entry(hook, hook_list2, list) {
+		if (!nft_hook_list_find(hook_list1, hook))
+			return false;
+
+		n++;
+	}
+	list_for_each_entry(hook, hook_list1, list)
+		m++;
+
+	return n == m;
+}
+
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+			      u32 flags, const struct nlattr *attr,
+			      struct netlink_ext_ack *extack)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_chain *chain = ctx->chain;
+	struct nft_base_chain *basechain;
+	struct nft_stats *stats = NULL;
+	struct nft_chain_hook hook;
+	struct nf_hook_ops *ops;
+	struct nft_trans *trans;
+	int err;
+
+	if (chain->flags ^ flags)
+		return -EOPNOTSUPP;
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		if (!nft_is_base_chain(chain)) {
+			NL_SET_BAD_ATTR(extack, attr);
+			return -EEXIST;
+		}
+		err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
+					   extack, false);
+		if (err < 0)
+			return err;
+
+		basechain = nft_base_chain(chain);
+		if (basechain->type != hook.type) {
+			nft_chain_release_hook(&hook);
+			NL_SET_BAD_ATTR(extack, attr);
+			return -EEXIST;
+		}
+
+		if (nft_base_chain_netdev(ctx->family, hook.num)) {
+			if (!nft_hook_list_equal(&basechain->hook_list,
+						 &hook.list)) {
+				nft_chain_release_hook(&hook);
+				NL_SET_BAD_ATTR(extack, attr);
+				return -EEXIST;
+			}
+		} else {
+			ops = &basechain->ops;
+			if (ops->hooknum != hook.num ||
+			    ops->priority != hook.priority) {
+				nft_chain_release_hook(&hook);
+				NL_SET_BAD_ATTR(extack, attr);
+				return -EEXIST;
+			}
+		}
+		nft_chain_release_hook(&hook);
+	}
+
+	if (nla[NFTA_CHAIN_HANDLE] &&
+	    nla[NFTA_CHAIN_NAME]) {
+		struct nft_chain *chain2;
+
+		chain2 = nft_chain_lookup(ctx->net, table,
+					  nla[NFTA_CHAIN_NAME], genmask);
+		if (!IS_ERR(chain2)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
+			return -EEXIST;
+		}
+	}
+
+	if (nla[NFTA_CHAIN_COUNTERS]) {
+		if (!nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+
+		stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+		if (IS_ERR(stats))
+			return PTR_ERR(stats);
+	}
+
+	err = -ENOMEM;
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+				sizeof(struct nft_trans_chain));
+	if (trans == NULL)
+		goto err;
+
+	nft_trans_chain_stats(trans) = stats;
+	nft_trans_chain_update(trans) = true;
+
+	if (nla[NFTA_CHAIN_POLICY])
+		nft_trans_chain_policy(trans) = policy;
+	else
+		nft_trans_chain_policy(trans) = -1;
+
+	if (nla[NFTA_CHAIN_HANDLE] &&
+	    nla[NFTA_CHAIN_NAME]) {
+		struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+		struct nft_trans *tmp;
+		char *name;
+
+		err = -ENOMEM;
+		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
+		if (!name)
+			goto err;
+
+		err = -EEXIST;
+		list_for_each_entry(tmp, &nft_net->commit_list, list) {
+			if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
+			    tmp->ctx.table == table &&
+			    nft_trans_chain_update(tmp) &&
+			    nft_trans_chain_name(tmp) &&
+			    strcmp(name, nft_trans_chain_name(tmp)) == 0) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
+				kfree(name);
+				goto err;
+			}
+		}
+
+		nft_trans_chain_name(trans) = name;
+	}
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+err:
+	free_percpu(stats);
+	kfree(trans);
+	return err;
+}
+
+static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+					       const struct nft_table *table,
+					       const struct nlattr *nla, u8 genmask)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	u32 id = ntohl(nla_get_be32(nla));
+	struct nft_trans *trans;
+
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
+		struct nft_chain *chain = trans->ctx.chain;
+
+		if (trans->msg_type == NFT_MSG_NEWCHAIN &&
+		    chain->table == table &&
+		    id == nft_trans_chain_id(trans) &&
+		    nft_active_genmask(chain, genmask))
+			return chain;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_chain *chain = NULL;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	u8 policy = NF_ACCEPT;
+	struct nft_ctx ctx;
+	u64 handle = 0;
+	u32 flags = 0;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	chain = NULL;
+	attr = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nft_chain_lookup_byhandle(table, handle, genmask);
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
+			return PTR_ERR(chain);
+		}
+		attr = nla[NFTA_CHAIN_HANDLE];
+	} else if (nla[NFTA_CHAIN_NAME]) {
+		chain = nft_chain_lookup(net, table, attr, genmask);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT) {
+				NL_SET_BAD_ATTR(extack, attr);
+				return PTR_ERR(chain);
+			}
+			chain = NULL;
+		}
+	} else if (!nla[NFTA_CHAIN_ID]) {
+		return -EINVAL;
+	}
+
+	if (nla[NFTA_CHAIN_POLICY]) {
+		if (chain != NULL &&
+		    !nft_is_base_chain(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
+			return -EOPNOTSUPP;
+		}
+
+		if (chain == NULL &&
+		    nla[NFTA_CHAIN_HOOK] == NULL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
+			return -EOPNOTSUPP;
+		}
+
+		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+		switch (policy) {
+		case NF_DROP:
+		case NF_ACCEPT:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (nla[NFTA_CHAIN_FLAGS])
+		flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+	else if (chain)
+		flags = chain->flags;
+
+	if (flags & ~NFT_CHAIN_FLAGS)
+		return -EOPNOTSUPP;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+	if (chain != NULL) {
+		if (chain->flags & NFT_CHAIN_BINDING)
+			return -EINVAL;
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, attr);
+			return -EEXIST;
+		}
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		flags |= chain->flags & NFT_CHAIN_BASE;
+		return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
+					  extack);
+	}
+
+	return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
+}
+
+static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule;
+	struct nft_ctx ctx;
+	u64 handle;
+	u32 use;
+	int err;
+
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		attr = nla[NFTA_CHAIN_HANDLE];
+		handle = be64_to_cpu(nla_get_be64(attr));
+		chain = nft_chain_lookup_byhandle(table, handle, genmask);
+	} else {
+		attr = nla[NFTA_CHAIN_NAME];
+		chain = nft_chain_lookup(net, table, attr, genmask);
+	}
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(chain);
+	}
+
+	if (nft_chain_binding(chain))
+		return -EOPNOTSUPP;
+
+	if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
+	    chain->use > 0)
+		return -EBUSY;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+	use = chain->use;
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (!nft_is_active_next(net, rule))
+			continue;
+		use--;
+
+		err = nft_delrule(&ctx, rule);
+		if (err < 0)
+			return err;
+	}
+
+	/* There are rules and elements that are still holding references to us,
+	 * we cannot do a recursive removal in this case.
+	 */
+	if (use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return -EBUSY;
+	}
+
+	return nft_delchain(&ctx);
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ *	nft_register_expr - register nf_tables expr type
+ *	@type: expr type
+ *
+ *	Registers the expr type for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	if (type->family == NFPROTO_UNSPEC)
+		list_add_tail_rcu(&type->list, &nf_tables_expressions);
+	else
+		list_add_rcu(&type->list, &nf_tables_expressions);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ *	nft_unregister_expr - unregister nf_tables expr type
+ *	@type: expr type
+ *
+ * 	Unregisters the expr typefor use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del_rcu(&type->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_type *__nft_expr_type_get(u8 family,
+						       struct nlattr *nla)
+{
+	const struct nft_expr_type *type, *candidate = NULL;
+
+	list_for_each_entry(type, &nf_tables_expressions, list) {
+		if (!nla_strcmp(nla, type->name)) {
+			if (!type->family && !candidate)
+				candidate = type;
+			else if (type->family == family)
+				candidate = type;
+		}
+	}
+	return candidate;
+}
+
+#ifdef CONFIG_MODULES
+static int nft_expr_type_request_module(struct net *net, u8 family,
+					struct nlattr *nla)
+{
+	if (nft_request_module(net, "nft-expr-%u-%.*s", family,
+			       nla_len(nla), (char *)nla_data(nla)) == -EAGAIN)
+		return -EAGAIN;
+
+	return 0;
+}
+#endif
+
+static const struct nft_expr_type *nft_expr_type_get(struct net *net,
+						     u8 family,
+						     struct nlattr *nla)
+{
+	const struct nft_expr_type *type;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	type = __nft_expr_type_get(family, nla);
+	if (type != NULL && try_module_get(type->owner))
+		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
+#ifdef CONFIG_MODULES
+	if (type == NULL) {
+		if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+
+		if (nft_request_module(net, "nft-expr-%.*s",
+				       nla_len(nla),
+				       (char *)nla_data(nla)) == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+	[NFTA_EXPR_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_MODULE_AUTOLOAD_LIMIT },
+	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+				    const struct nft_expr *expr)
+{
+	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
+		goto nla_put_failure;
+
+	if (expr->ops->dump) {
+		struct nlattr *data = nla_nest_start_noflag(skb,
+							    NFTA_EXPR_DATA);
+		if (data == NULL)
+			goto nla_put_failure;
+		if (expr->ops->dump(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, data);
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+};
+
+int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
+		  const struct nft_expr *expr)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start_noflag(skb, attr);
+	if (!nest)
+		goto nla_put_failure;
+	if (nf_tables_fill_expr_info(skb, expr) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+struct nft_expr_info {
+	const struct nft_expr_ops	*ops;
+	const struct nlattr		*attr;
+	struct nlattr			*tb[NFT_EXPR_MAXATTR + 1];
+};
+
+static int nf_tables_expr_parse(const struct nft_ctx *ctx,
+				const struct nlattr *nla,
+				struct nft_expr_info *info)
+{
+	const struct nft_expr_type *type;
+	const struct nft_expr_ops *ops;
+	struct nlattr *tb[NFTA_EXPR_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla,
+					  nft_expr_policy, NULL);
+	if (err < 0)
+		return err;
+
+	type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
+	if (IS_ERR(type))
+		return PTR_ERR(type);
+
+	if (tb[NFTA_EXPR_DATA]) {
+		err = nla_parse_nested_deprecated(info->tb, type->maxattr,
+						  tb[NFTA_EXPR_DATA],
+						  type->policy, NULL);
+		if (err < 0)
+			goto err1;
+	} else
+		memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
+
+	if (type->select_ops != NULL) {
+		ops = type->select_ops(ctx,
+				       (const struct nlattr * const *)info->tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+#ifdef CONFIG_MODULES
+			if (err == -EAGAIN)
+				if (nft_expr_type_request_module(ctx->net,
+								 ctx->family,
+								 tb[NFTA_EXPR_NAME]) != -EAGAIN)
+					err = -ENOENT;
+#endif
+			goto err1;
+		}
+	} else
+		ops = type->ops;
+
+	info->attr = nla;
+	info->ops = ops;
+
+	return 0;
+
+err1:
+	module_put(type->owner);
+	return err;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+			     const struct nft_expr_info *expr_info,
+			     struct nft_expr *expr)
+{
+	const struct nft_expr_ops *ops = expr_info->ops;
+	int err;
+
+	expr->ops = ops;
+	if (ops->init) {
+		err = ops->init(ctx, expr, (const struct nlattr **)expr_info->tb);
+		if (err < 0)
+			goto err1;
+	}
+
+	return 0;
+err1:
+	expr->ops = NULL;
+	return err;
+}
+
+static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
+				   struct nft_expr *expr)
+{
+	const struct nft_expr_type *type = expr->ops->type;
+
+	if (expr->ops->destroy)
+		expr->ops->destroy(ctx, expr);
+	module_put(type->owner);
+}
+
+static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
+				      const struct nlattr *nla)
+{
+	struct nft_expr_info expr_info;
+	struct nft_expr *expr;
+	struct module *owner;
+	int err;
+
+	err = nf_tables_expr_parse(ctx, nla, &expr_info);
+	if (err < 0)
+		goto err_expr_parse;
+
+	err = -EOPNOTSUPP;
+	if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL))
+		goto err_expr_stateful;
+
+	err = -ENOMEM;
+	expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
+	if (expr == NULL)
+		goto err_expr_stateful;
+
+	err = nf_tables_newexpr(ctx, &expr_info, expr);
+	if (err < 0)
+		goto err_expr_new;
+
+	return expr;
+err_expr_new:
+	kfree(expr);
+err_expr_stateful:
+	owner = expr_info.ops->type->owner;
+	if (expr_info.ops->type->release_ops)
+		expr_info.ops->type->release_ops(expr_info.ops);
+
+	module_put(owner);
+err_expr_parse:
+	return ERR_PTR(err);
+}
+
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+{
+	int err;
+
+	if (src->ops->clone) {
+		dst->ops = src->ops;
+		err = src->ops->clone(dst, src);
+		if (err < 0)
+			return err;
+	} else {
+		memcpy(dst, src, src->ops->size);
+	}
+
+	__module_get(src->ops->type->owner);
+
+	return 0;
+}
+
+void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
+{
+	nf_tables_expr_destroy(ctx, expr);
+	kfree(expr);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+					  u64 handle)
+{
+	struct nft_rule *rule;
+
+	// FIXME: this sucks
+	list_for_each_entry_rcu(rule, &chain->rules, list) {
+		if (handle == rule->handle)
+			return rule;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+					const struct nlattr *nla)
+{
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+	[NFTA_RULE_TABLE]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
+	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
+	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
+	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_RULE_ID]		= { .type = NLA_U32 },
+	[NFTA_RULE_POSITION_ID]	= { .type = NLA_U32 },
+	[NFTA_RULE_CHAIN_ID]	= { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+				    u32 portid, u32 seq, int event,
+				    u32 flags, int family,
+				    const struct nft_table *table,
+				    const struct nft_chain *chain,
+				    const struct nft_rule *rule, u64 handle)
+{
+	struct nlmsghdr *nlh;
+	const struct nft_expr *expr, *next;
+	struct nlattr *list;
+	u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+
+	nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
+			   nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle),
+			 NFTA_RULE_PAD))
+		goto nla_put_failure;
+
+	if (event != NFT_MSG_DELRULE && handle) {
+		if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle),
+				 NFTA_RULE_PAD))
+			goto nla_put_failure;
+	}
+
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+		nft_flow_rule_stats(chain, rule);
+
+	list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
+	if (list == NULL)
+		goto nla_put_failure;
+	nft_rule_for_each_expr(expr, next, rule) {
+		if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, list);
+
+	if (rule->udata) {
+		struct nft_userdata *udata = nft_userdata(rule);
+		if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
+			    udata->data) < 0)
+			goto nla_put_failure;
+	}
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static void nf_tables_rule_notify(const struct nft_ctx *ctx,
+				  const struct nft_rule *rule, int event)
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	const struct nft_rule *prule;
+	struct sk_buff *skb;
+	u64 handle = 0;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	if (event == NFT_MSG_NEWRULE &&
+	    !list_is_first(&rule->list, &ctx->chain->rules) &&
+	    !list_is_last(&rule->list, &ctx->chain->rules)) {
+		prule = list_prev_entry(rule, list);
+		handle = prule->handle;
+	}
+	if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE))
+		flags |= NLM_F_APPEND;
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+				       event, flags, ctx->family, ctx->table,
+				       ctx->chain, rule, handle);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+struct nft_rule_dump_ctx {
+	char *table;
+	char *chain;
+};
+
+static int __nf_tables_dump_rules(struct sk_buff *skb,
+				  unsigned int *idx,
+				  struct netlink_callback *cb,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain)
+{
+	struct net *net = sock_net(skb->sk);
+	const struct nft_rule *rule, *prule;
+	unsigned int s_idx = cb->args[0];
+	u64 handle;
+
+	prule = NULL;
+	list_for_each_entry_rcu(rule, &chain->rules, list) {
+		if (!nft_is_active(net, rule))
+			goto cont_skip;
+		if (*idx < s_idx)
+			goto cont;
+		if (prule)
+			handle = prule->handle;
+		else
+			handle = 0;
+
+		if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+					cb->nlh->nlmsg_seq,
+					NFT_MSG_NEWRULE,
+					NLM_F_MULTI | NLM_F_APPEND,
+					table->family,
+					table, chain, rule, handle) < 0)
+			return 1;
+
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+		prule = rule;
+cont_skip:
+		(*idx)++;
+	}
+	return 0;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_rule_dump_ctx *ctx = cb->data;
+	struct nft_table *table;
+	const struct nft_chain *chain;
+	unsigned int idx = 0;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (family != NFPROTO_UNSPEC && family != table->family)
+			continue;
+
+		if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+			continue;
+
+		if (ctx && ctx->table && ctx->chain) {
+			struct rhlist_head *list, *tmp;
+
+			list = rhltable_lookup(&table->chains_ht, ctx->chain,
+					       nft_chain_ht_params);
+			if (!list)
+				goto done;
+
+			rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
+				if (!nft_is_active(net, chain))
+					continue;
+				__nf_tables_dump_rules(skb, &idx,
+						       cb, table, chain);
+				break;
+			}
+			goto done;
+		}
+
+		list_for_each_entry_rcu(chain, &table->chains, list) {
+			if (__nf_tables_dump_rules(skb, &idx, cb, table, chain))
+				goto done;
+		}
+
+		if (ctx && ctx->table)
+			break;
+	}
+done:
+	rcu_read_unlock();
+
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_dump_rules_start(struct netlink_callback *cb)
+{
+	const struct nlattr * const *nla = cb->data;
+	struct nft_rule_dump_ctx *ctx = NULL;
+
+	if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
+		ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+		if (!ctx)
+			return -ENOMEM;
+
+		if (nla[NFTA_RULE_TABLE]) {
+			ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+							GFP_ATOMIC);
+			if (!ctx->table) {
+				kfree(ctx);
+				return -ENOMEM;
+			}
+		}
+		if (nla[NFTA_RULE_CHAIN]) {
+			ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+						GFP_ATOMIC);
+			if (!ctx->chain) {
+				kfree(ctx->table);
+				kfree(ctx);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	cb->data = ctx;
+	return 0;
+}
+
+static int nf_tables_dump_rules_done(struct netlink_callback *cb)
+{
+	struct nft_rule_dump_ctx *ctx = cb->data;
+
+	if (ctx) {
+		kfree(ctx->table);
+		kfree(ctx->chain);
+		kfree(ctx);
+	}
+	return 0;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct sk_buff *skb2;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start= nf_tables_dump_rules_start,
+			.dump = nf_tables_dump_rules,
+			.done = nf_tables_dump_rules_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+		return PTR_ERR(chain);
+	}
+
+	rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+	if (IS_ERR(rule)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
+		return PTR_ERR(rule);
+	}
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
+				       info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+				       family, table, chain, rule, 0);
+	if (err < 0)
+		goto err_fill_rule_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_rule_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	struct nft_expr *expr, *next;
+
+	/*
+	 * Careful: some expressions might not be initialized in case this
+	 * is called on error from nf_tables_newrule().
+	 */
+	expr = nft_expr_first(rule);
+	while (nft_expr_more(rule, expr)) {
+		next = nft_expr_next(expr);
+		nf_tables_expr_destroy(ctx, expr);
+		expr = next;
+	}
+	kfree(rule);
+}
+
+static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
+	nf_tables_rule_destroy(ctx, rule);
+}
+
+int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+{
+	struct nft_expr *expr, *last;
+	const struct nft_data *data;
+	struct nft_rule *rule;
+	int err;
+
+	if (ctx->level == NFT_JUMP_STACK_SIZE)
+		return -EMLINK;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (!nft_is_active_next(ctx->net, rule))
+			continue;
+
+		nft_rule_for_each_expr(expr, last, rule) {
+			if (!expr->ops->validate)
+				continue;
+
+			err = expr->ops->validate(ctx, expr, &data);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate);
+
+static int nft_table_validate(struct net *net, const struct nft_table *table)
+{
+	struct nft_chain *chain;
+	struct nft_ctx ctx = {
+		.net	= net,
+		.family	= table->family,
+	};
+	int err;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nft_is_base_chain(chain))
+			continue;
+
+		ctx.chain = chain;
+		err = nft_chain_validate(&ctx, chain);
+		if (err < 0)
+			return err;
+
+		cond_resched();
+	}
+
+	return 0;
+}
+
+int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
+			 const struct nft_set_iter *iter,
+			 struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	struct nft_ctx *pctx = (struct nft_ctx *)ctx;
+	const struct nft_data *data;
+	int err;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+		return 0;
+
+	data = nft_set_ext_data(ext);
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		pctx->level++;
+		err = nft_chain_validate(ctx, data->verdict.chain);
+		if (err < 0)
+			return err;
+		pctx->level--;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = nft_setelem_validate(ctx, set, NULL, &elem);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+					     const struct nft_chain *chain,
+					     const struct nlattr *nla);
+
+#define NFT_RULE_MAXEXPRS	128
+
+static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	unsigned int size, i, n, ulen = 0, usize = 0;
+	u8 genmask = nft_genmask_next(info->net);
+	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_expr_info *expr_info = NULL;
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_flow_rule *flow = NULL;
+	struct net *net = info->net;
+	struct nft_userdata *udata;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_trans *trans;
+	u64 handle, pos_handle;
+	struct nft_expr *expr;
+	struct nft_ctx ctx;
+	struct nlattr *tmp;
+	int err, rem;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_RULE_CHAIN]) {
+		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+					 genmask);
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+			return PTR_ERR(chain);
+		}
+
+	} else if (nla[NFTA_RULE_CHAIN_ID]) {
+		chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+					      genmask);
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
+			return PTR_ERR(chain);
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	if (nft_chain_is_bound(chain))
+		return -EOPNOTSUPP;
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+		rule = __nft_rule_lookup(chain, handle);
+		if (IS_ERR(rule)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
+			return PTR_ERR(rule);
+		}
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
+			return -EEXIST;
+		}
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			old_rule = rule;
+		else
+			return -EOPNOTSUPP;
+	} else {
+		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE) ||
+		    info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EINVAL;
+		handle = nf_tables_alloc_handle(table);
+
+		if (nla[NFTA_RULE_POSITION]) {
+			pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+			old_rule = __nft_rule_lookup(chain, pos_handle);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+				return PTR_ERR(old_rule);
+			}
+		} else if (nla[NFTA_RULE_POSITION_ID]) {
+			old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
+				return PTR_ERR(old_rule);
+			}
+		}
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+	n = 0;
+	size = 0;
+	if (nla[NFTA_RULE_EXPRESSIONS]) {
+		expr_info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+					   sizeof(struct nft_expr_info),
+					   GFP_KERNEL);
+		if (!expr_info)
+			return -ENOMEM;
+
+		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+			err = -EINVAL;
+			if (nla_type(tmp) != NFTA_LIST_ELEM)
+				goto err_release_expr;
+			if (n == NFT_RULE_MAXEXPRS)
+				goto err_release_expr;
+			err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
+			if (err < 0) {
+				NL_SET_BAD_ATTR(extack, tmp);
+				goto err_release_expr;
+			}
+			size += expr_info[n].ops->size;
+			n++;
+		}
+	}
+	/* Check for overflow of dlen field */
+	err = -EFBIG;
+	if (size >= 1 << 12)
+		goto err_release_expr;
+
+	if (nla[NFTA_RULE_USERDATA]) {
+		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+		if (ulen > 0)
+			usize = sizeof(struct nft_userdata) + ulen;
+	}
+
+	err = -ENOMEM;
+	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
+	if (rule == NULL)
+		goto err_release_expr;
+
+	nft_activate_next(net, rule);
+
+	rule->handle = handle;
+	rule->dlen   = size;
+	rule->udata  = ulen ? 1 : 0;
+
+	if (ulen) {
+		udata = nft_userdata(rule);
+		udata->len = ulen - 1;
+		nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
+	}
+
+	expr = nft_expr_first(rule);
+	for (i = 0; i < n; i++) {
+		err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
+		if (err < 0) {
+			NL_SET_BAD_ATTR(extack, expr_info[i].attr);
+			goto err_release_rule;
+		}
+
+		if (expr_info[i].ops->validate)
+			nft_validate_state_update(net, NFT_VALIDATE_NEED);
+
+		expr_info[i].ops = NULL;
+		expr = nft_expr_next(expr);
+	}
+
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+		flow = nft_flow_rule_create(net, rule);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto err_release_rule;
+		}
+	}
+
+	if (!nft_use_inc(&chain->use)) {
+		err = -EMFILE;
+		goto err_release_rule;
+	}
+
+	if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+		if (nft_chain_binding(chain)) {
+			err = -EOPNOTSUPP;
+			goto err_destroy_flow_rule;
+		}
+
+		err = nft_delrule(&ctx, old_rule);
+		if (err < 0)
+			goto err_destroy_flow_rule;
+
+		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
+		if (trans == NULL) {
+			err = -ENOMEM;
+			goto err_destroy_flow_rule;
+		}
+		list_add_tail_rcu(&rule->list, &old_rule->list);
+	} else {
+		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
+		if (!trans) {
+			err = -ENOMEM;
+			goto err_destroy_flow_rule;
+		}
+
+		if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
+			if (old_rule)
+				list_add_rcu(&rule->list, &old_rule->list);
+			else
+				list_add_tail_rcu(&rule->list, &chain->rules);
+		 } else {
+			if (old_rule)
+				list_add_tail_rcu(&rule->list, &old_rule->list);
+			else
+				list_add_rcu(&rule->list, &chain->rules);
+		}
+	}
+	kvfree(expr_info);
+
+	if (flow)
+		nft_trans_flow_rule(trans) = flow;
+
+	if (nft_net->validate_state == NFT_VALIDATE_DO)
+		return nft_table_validate(net, table);
+
+	return 0;
+
+err_destroy_flow_rule:
+	nft_use_dec_restore(&chain->use);
+	if (flow)
+		nft_flow_rule_destroy(flow);
+err_release_rule:
+	nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
+	nf_tables_rule_destroy(&ctx, rule);
+err_release_expr:
+	for (i = 0; i < n; i++) {
+		if (expr_info[i].ops) {
+			module_put(expr_info[i].ops->type->owner);
+			if (expr_info[i].ops->type->release_ops)
+				expr_info[i].ops->type->release_ops(expr_info[i].ops);
+		}
+	}
+	kvfree(expr_info);
+
+	return err;
+}
+
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+					     const struct nft_chain *chain,
+					     const struct nlattr *nla)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	u32 id = ntohl(nla_get_be32(nla));
+	struct nft_trans *trans;
+
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
+		if (trans->msg_type == NFT_MSG_NEWRULE &&
+		    trans->ctx.chain == chain &&
+		    id == nft_trans_rule_id(trans))
+			return nft_trans_rule(trans);
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_chain *chain = NULL;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct nft_rule *rule;
+	struct nft_ctx ctx;
+	int err = 0;
+
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_RULE_CHAIN]) {
+		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+					 genmask);
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+			return PTR_ERR(chain);
+		}
+		if (nft_chain_binding(chain))
+			return -EOPNOTSUPP;
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+	if (chain) {
+		if (nla[NFTA_RULE_HANDLE]) {
+			rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+			if (IS_ERR(rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
+				return PTR_ERR(rule);
+			}
+
+			err = nft_delrule(&ctx, rule);
+		} else if (nla[NFTA_RULE_ID]) {
+			rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]);
+			if (IS_ERR(rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
+				return PTR_ERR(rule);
+			}
+
+			err = nft_delrule(&ctx, rule);
+		} else {
+			err = nft_delrule_by_chain(&ctx);
+		}
+	} else {
+		list_for_each_entry(chain, &table->chains, list) {
+			if (!nft_is_active_next(net, chain))
+				continue;
+			if (nft_chain_binding(chain))
+				continue;
+
+			ctx.chain = chain;
+			err = nft_delrule_by_chain(&ctx);
+			if (err < 0)
+				break;
+		}
+	}
+
+	return err;
+}
+
+/*
+ * Sets
+ */
+static const struct nft_set_type *nft_set_types[] = {
+	&nft_set_hash_fast_type,
+	&nft_set_hash_type,
+	&nft_set_rhash_type,
+	&nft_set_bitmap_type,
+	&nft_set_rbtree_type,
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+	&nft_set_pipapo_avx2_type,
+#endif
+	&nft_set_pipapo_type,
+};
+
+#define NFT_SET_FEATURES	(NFT_SET_INTERVAL | NFT_SET_MAP | \
+				 NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
+				 NFT_SET_EVAL)
+
+static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
+{
+	return (flags & type->features) == (flags & NFT_SET_FEATURES);
+}
+
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nft_ctx *ctx,
+		   const struct nlattr * const nla[],
+		   const struct nft_set_desc *desc)
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	const struct nft_set_ops *ops, *bops;
+	struct nft_set_estimate est, best;
+	const struct nft_set_type *type;
+	u32 flags = 0;
+	int i;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
+
+	if (nla[NFTA_SET_FLAGS] != NULL)
+		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+
+	bops	    = NULL;
+	best.size   = ~0;
+	best.lookup = ~0;
+	best.space  = ~0;
+
+	for (i = 0; i < ARRAY_SIZE(nft_set_types); i++) {
+		type = nft_set_types[i];
+		ops = &type->ops;
+
+		if (!nft_set_ops_candidate(type, flags))
+			continue;
+		if (!ops->estimate(desc, flags, &est))
+			continue;
+
+		switch (desc->policy) {
+		case NFT_SET_POL_PERFORMANCE:
+			if (est.lookup < best.lookup)
+				break;
+			if (est.lookup == best.lookup &&
+			    est.space < best.space)
+				break;
+			continue;
+		case NFT_SET_POL_MEMORY:
+			if (!desc->size) {
+				if (est.space < best.space)
+					break;
+				if (est.space == best.space &&
+				    est.lookup < best.lookup)
+					break;
+			} else if (est.size < best.size || !bops) {
+				break;
+			}
+			continue;
+		default:
+			break;
+		}
+
+		bops = ops;
+		best = est;
+	}
+
+	if (bops != NULL)
+		return bops;
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+	[NFTA_SET_TABLE]		= { .type = NLA_STRING,
+					    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_SET_NAME]			= { .type = NLA_STRING,
+					    .len = NFT_SET_MAXNAMELEN - 1 },
+	[NFTA_SET_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_SET_KEY_TYPE]		= { .type = NLA_U32 },
+	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
+	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
+	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
+	[NFTA_SET_ID]			= { .type = NLA_U32 },
+	[NFTA_SET_TIMEOUT]		= { .type = NLA_U64 },
+	[NFTA_SET_GC_INTERVAL]		= { .type = NLA_U32 },
+	[NFTA_SET_USERDATA]		= { .type = NLA_BINARY,
+					    .len  = NFT_USERDATA_MAXLEN },
+	[NFTA_SET_OBJ_TYPE]		= { .type = NLA_U32 },
+	[NFTA_SET_HANDLE]		= { .type = NLA_U64 },
+	[NFTA_SET_EXPR]			= { .type = NLA_NESTED },
+	[NFTA_SET_EXPRESSIONS]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+	[NFTA_SET_DESC_SIZE]		= { .type = NLA_U32 },
+	[NFTA_SET_DESC_CONCAT]		= { .type = NLA_NESTED },
+};
+
+static struct nft_set *nft_set_lookup(const struct nft_table *table,
+				      const struct nlattr *nla, u8 genmask)
+{
+	struct nft_set *set;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	list_for_each_entry_rcu(set, &table->sets, list) {
+		if (!nla_strcmp(nla, set->name) &&
+		    nft_active_genmask(set, genmask))
+			return set;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
+					       const struct nlattr *nla,
+					       u8 genmask)
+{
+	struct nft_set *set;
+
+	list_for_each_entry(set, &table->sets, list) {
+		if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
+		    nft_active_genmask(set, genmask))
+			return set;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_set *nft_set_lookup_byid(const struct net *net,
+					   const struct nft_table *table,
+					   const struct nlattr *nla, u8 genmask)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	u32 id = ntohl(nla_get_be32(nla));
+	struct nft_trans *trans;
+
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
+		if (trans->msg_type == NFT_MSG_NEWSET) {
+			struct nft_set *set = nft_trans_set(trans);
+
+			if (id == nft_trans_set_id(trans) &&
+			    set->table == table &&
+			    nft_active_genmask(set, genmask))
+				return set;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+struct nft_set *nft_set_lookup_global(const struct net *net,
+				      const struct nft_table *table,
+				      const struct nlattr *nla_set_name,
+				      const struct nlattr *nla_set_id,
+				      u8 genmask)
+{
+	struct nft_set *set;
+
+	set = nft_set_lookup(table, nla_set_name, genmask);
+	if (IS_ERR(set)) {
+		if (!nla_set_id)
+			return set;
+
+		set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
+	}
+	return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup_global);
+
+static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
+				    const char *name)
+{
+	const struct nft_set *i;
+	const char *p;
+	unsigned long *inuse;
+	unsigned int n = 0, min = 0;
+
+	p = strchr(name, '%');
+	if (p != NULL) {
+		if (p[1] != 'd' || strchr(p + 2, '%'))
+			return -EINVAL;
+
+		if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+			return -EINVAL;
+
+		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (inuse == NULL)
+			return -ENOMEM;
+cont:
+		list_for_each_entry(i, &ctx->table->sets, list) {
+			int tmp;
+
+			if (!nft_is_active_next(ctx->net, i))
+				continue;
+			if (!sscanf(i->name, name, &tmp))
+				continue;
+			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
+				continue;
+
+			set_bit(tmp - min, inuse);
+		}
+
+		n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+		if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+			min += BITS_PER_BYTE * PAGE_SIZE;
+			memset(inuse, 0, PAGE_SIZE);
+			goto cont;
+		}
+		free_page((unsigned long)inuse);
+	}
+
+	set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
+	if (!set->name)
+		return -ENOMEM;
+
+	list_for_each_entry(i, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, i))
+			continue;
+		if (!strcmp(set->name, i->name)) {
+			kfree(set->name);
+			set->name = NULL;
+			return -ENFILE;
+		}
+	}
+	return 0;
+}
+
+int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
+{
+	u64 ms = be64_to_cpu(nla_get_be64(nla));
+	u64 max = (u64)(~((u64)0));
+
+	max = div_u64(max, NSEC_PER_MSEC);
+	if (ms >= max)
+		return -ERANGE;
+
+	ms *= NSEC_PER_MSEC;
+	*result = nsecs_to_jiffies64(ms);
+	return 0;
+}
+
+__be64 nf_jiffies64_to_msecs(u64 input)
+{
+	return cpu_to_be64(jiffies64_to_msecs(input));
+}
+
+static int nf_tables_fill_set_concat(struct sk_buff *skb,
+				     const struct nft_set *set)
+{
+	struct nlattr *concat, *field;
+	int i;
+
+	concat = nla_nest_start_noflag(skb, NFTA_SET_DESC_CONCAT);
+	if (!concat)
+		return -ENOMEM;
+
+	for (i = 0; i < set->field_count; i++) {
+		field = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
+		if (!field)
+			return -ENOMEM;
+
+		if (nla_put_be32(skb, NFTA_SET_FIELD_LEN,
+				 htonl(set->field_len[i])))
+			return -ENOMEM;
+
+		nla_nest_end(skb, field);
+	}
+
+	nla_nest_end(skb, concat);
+
+	return 0;
+}
+
+static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+			      const struct nft_set *set, u16 event, u16 flags)
+{
+	u64 timeout = READ_ONCE(set->timeout);
+	u32 gc_int = READ_ONCE(set->gc_int);
+	u32 portid = ctx->portid;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	u32 seq = ctx->seq;
+	int i;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+			   NFNETLINK_V0, nft_base_seq(ctx->net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
+			 NFTA_SET_PAD))
+		goto nla_put_failure;
+	if (set->flags != 0)
+		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
+			goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
+		goto nla_put_failure;
+	if (set->flags & NFT_SET_MAP) {
+		if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
+			goto nla_put_failure;
+	}
+	if (set->flags & NFT_SET_OBJECT &&
+	    nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
+		goto nla_put_failure;
+
+	if (timeout &&
+	    nla_put_be64(skb, NFTA_SET_TIMEOUT,
+			 nf_jiffies64_to_msecs(timeout),
+			 NFTA_SET_PAD))
+		goto nla_put_failure;
+	if (gc_int &&
+	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
+		goto nla_put_failure;
+
+	if (set->policy != NFT_SET_POL_PERFORMANCE) {
+		if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+			goto nla_put_failure;
+	}
+
+	if (set->udata &&
+	    nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata))
+		goto nla_put_failure;
+
+	nest = nla_nest_start_noflag(skb, NFTA_SET_DESC);
+	if (!nest)
+		goto nla_put_failure;
+	if (set->size &&
+	    nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+		goto nla_put_failure;
+
+	if (set->field_count > 1 &&
+	    nf_tables_fill_set_concat(skb, set))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (set->num_exprs == 1) {
+		nest = nla_nest_start_noflag(skb, NFTA_SET_EXPR);
+		if (nf_tables_fill_expr_info(skb, set->exprs[0]) < 0)
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest);
+	} else if (set->num_exprs > 1) {
+		nest = nla_nest_start_noflag(skb, NFTA_SET_EXPRESSIONS);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < set->num_exprs; i++) {
+			if (nft_expr_dump(skb, NFTA_LIST_ELEM,
+					  set->exprs[i]) < 0)
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static void nf_tables_set_notify(const struct nft_ctx *ctx,
+				 const struct nft_set *set, int event,
+			         gfp_t gfp_flags)
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	u32 portid = ctx->portid;
+	struct sk_buff *skb;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
+	if (skb == NULL)
+		goto err;
+
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_set(skb, ctx, set, event, flags);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct nft_set *set;
+	unsigned int idx, s_idx = cb->args[0];
+	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+	struct net *net = sock_net(skb->sk);
+	struct nft_ctx *ctx = cb->data, ctx_set;
+	struct nftables_pernet *nft_net;
+
+	if (cb->args[1])
+		return skb->len;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (ctx->family != NFPROTO_UNSPEC &&
+		    ctx->family != table->family)
+			continue;
+
+		if (ctx->table && ctx->table != table)
+			continue;
+
+		if (cur_table) {
+			if (cur_table != table)
+				continue;
+
+			cur_table = NULL;
+		}
+		idx = 0;
+		list_for_each_entry_rcu(set, &table->sets, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (!nft_is_active(net, set))
+				goto cont;
+
+			ctx_set = *ctx;
+			ctx_set.table = table;
+			ctx_set.family = table->family;
+
+			if (nf_tables_fill_set(skb, &ctx_set, set,
+					       NFT_MSG_NEWSET,
+					       NLM_F_MULTI) < 0) {
+				cb->args[0] = idx;
+				cb->args[2] = (unsigned long) table;
+				goto done;
+			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+		if (s_idx)
+			s_idx = 0;
+	}
+	cb->args[1] = 1;
+done:
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int nf_tables_dump_sets_start(struct netlink_callback *cb)
+{
+	struct nft_ctx *ctx_dump = NULL;
+
+	ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC);
+	if (ctx_dump == NULL)
+		return -ENOMEM;
+
+	cb->data = ctx_dump;
+	return 0;
+}
+
+static int nf_tables_dump_sets_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_table *table = NULL;
+	struct net *net = info->net;
+	const struct nft_set *set;
+	struct sk_buff *skb2;
+	struct nft_ctx ctx;
+	int err;
+
+	if (nla[NFTA_SET_TABLE]) {
+		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+					 genmask, 0);
+		if (IS_ERR(table)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+			return PTR_ERR(table);
+		}
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_sets_start,
+			.dump = nf_tables_dump_sets,
+			.done = nf_tables_dump_sets_done,
+			.data = &ctx,
+			.module = THIS_MODULE,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	/* Only accept unspec with dump */
+	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
+		return -EAFNOSUPPORT;
+	if (!nla[NFTA_SET_TABLE])
+		return -EINVAL;
+
+	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
+	if (err < 0)
+		goto err_fill_set_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_set_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
+	[NFTA_SET_FIELD_LEN]	= { .type = NLA_U32 },
+};
+
+static int nft_set_desc_concat_parse(const struct nlattr *attr,
+				     struct nft_set_desc *desc)
+{
+	struct nlattr *tb[NFTA_SET_FIELD_MAX + 1];
+	u32 len;
+	int err;
+
+	if (desc->field_count >= ARRAY_SIZE(desc->field_len))
+		return -E2BIG;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
+					  nft_concat_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_SET_FIELD_LEN])
+		return -EINVAL;
+
+	len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
+	if (!len || len > U8_MAX)
+		return -EINVAL;
+
+	desc->field_len[desc->field_count++] = len;
+
+	return 0;
+}
+
+static int nft_set_desc_concat(struct nft_set_desc *desc,
+			       const struct nlattr *nla)
+{
+	u32 num_regs = 0, key_num_regs = 0;
+	struct nlattr *attr;
+	int rem, err, i;
+
+	nla_for_each_nested(attr, nla, rem) {
+		if (nla_type(attr) != NFTA_LIST_ELEM)
+			return -EINVAL;
+
+		err = nft_set_desc_concat_parse(attr, desc);
+		if (err < 0)
+			return err;
+	}
+
+	for (i = 0; i < desc->field_count; i++)
+		num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+
+	key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
+	if (key_num_regs != num_regs)
+		return -EINVAL;
+
+	if (num_regs > NFT_REG32_COUNT)
+		return -E2BIG;
+
+	return 0;
+}
+
+static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
+				    const struct nlattr *nla)
+{
+	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla,
+					  nft_set_desc_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (da[NFTA_SET_DESC_SIZE] != NULL)
+		desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+	if (da[NFTA_SET_DESC_CONCAT])
+		err = nft_set_desc_concat(desc, da[NFTA_SET_DESC_CONCAT]);
+
+	return err;
+}
+
+static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+			      const struct nlattr * const *nla,
+			      struct nft_expr **exprs, int *num_exprs,
+			      u32 flags)
+{
+	struct nft_expr *expr;
+	int err, i;
+
+	if (nla[NFTA_SET_EXPR]) {
+		expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
+		if (IS_ERR(expr)) {
+			err = PTR_ERR(expr);
+			goto err_set_expr_alloc;
+		}
+		exprs[0] = expr;
+		(*num_exprs)++;
+	} else if (nla[NFTA_SET_EXPRESSIONS]) {
+		struct nlattr *tmp;
+		int left;
+
+		if (!(flags & NFT_SET_EXPR)) {
+			err = -EINVAL;
+			goto err_set_expr_alloc;
+		}
+		i = 0;
+		nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+			if (i == NFT_SET_EXPR_MAX) {
+				err = -E2BIG;
+				goto err_set_expr_alloc;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_set_expr_alloc;
+			}
+			expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+			if (IS_ERR(expr)) {
+				err = PTR_ERR(expr);
+				goto err_set_expr_alloc;
+			}
+			exprs[i++] = expr;
+			(*num_exprs)++;
+		}
+	}
+
+	return 0;
+
+err_set_expr_alloc:
+	for (i = 0; i < *num_exprs; i++)
+		nft_expr_destroy(ctx, exprs[i]);
+
+	return err;
+}
+
+static bool nft_set_is_same(const struct nft_set *set,
+			    const struct nft_set_desc *desc,
+			    struct nft_expr *exprs[], u32 num_exprs, u32 flags)
+{
+	int i;
+
+	if (set->ktype != desc->ktype ||
+	    set->dtype != desc->dtype ||
+	    set->flags != flags ||
+	    set->klen != desc->klen ||
+	    set->dlen != desc->dlen ||
+	    set->field_count != desc->field_count ||
+	    set->num_exprs != num_exprs)
+		return false;
+
+	for (i = 0; i < desc->field_count; i++) {
+		if (set->field_len[i] != desc->field_len[i])
+			return false;
+	}
+
+	for (i = 0; i < num_exprs; i++) {
+		if (set->exprs[i]->ops != exprs[i]->ops)
+			return false;
+	}
+
+	return true;
+}
+
+static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_set_ops *ops;
+	struct net *net = info->net;
+	struct nft_set_desc desc;
+	struct nft_table *table;
+	unsigned char *udata;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	size_t alloc_size;
+	int num_exprs = 0;
+	char *name;
+	int err, i;
+	u16 udlen;
+	u32 flags;
+	u64 size;
+
+	if (nla[NFTA_SET_TABLE] == NULL ||
+	    nla[NFTA_SET_NAME] == NULL ||
+	    nla[NFTA_SET_KEY_LEN] == NULL ||
+	    nla[NFTA_SET_ID] == NULL)
+		return -EINVAL;
+
+	memset(&desc, 0, sizeof(desc));
+
+	desc.ktype = NFT_DATA_VALUE;
+	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+		desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+		if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+			return -EINVAL;
+	}
+
+	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+	if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
+		return -EINVAL;
+
+	flags = 0;
+	if (nla[NFTA_SET_FLAGS] != NULL) {
+		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
+			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
+			      NFT_SET_MAP | NFT_SET_EVAL |
+			      NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR))
+			return -EOPNOTSUPP;
+		/* Only one of these operations is supported */
+		if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
+			     (NFT_SET_MAP | NFT_SET_OBJECT))
+			return -EOPNOTSUPP;
+		if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+			     (NFT_SET_EVAL | NFT_SET_OBJECT))
+			return -EOPNOTSUPP;
+	}
+
+	desc.dtype = 0;
+	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+		if (!(flags & NFT_SET_MAP))
+			return -EINVAL;
+
+		desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+		if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+		    desc.dtype != NFT_DATA_VERDICT)
+			return -EINVAL;
+
+		if (desc.dtype != NFT_DATA_VERDICT) {
+			if (nla[NFTA_SET_DATA_LEN] == NULL)
+				return -EINVAL;
+			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+			if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
+				return -EINVAL;
+		} else
+			desc.dlen = sizeof(struct nft_verdict);
+	} else if (flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	if (nla[NFTA_SET_OBJ_TYPE] != NULL) {
+		if (!(flags & NFT_SET_OBJECT))
+			return -EINVAL;
+
+		desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+		if (desc.objtype == NFT_OBJECT_UNSPEC ||
+		    desc.objtype > NFT_OBJECT_MAX)
+			return -EOPNOTSUPP;
+	} else if (flags & NFT_SET_OBJECT)
+		return -EINVAL;
+	else
+		desc.objtype = NFT_OBJECT_UNSPEC;
+
+	desc.timeout = 0;
+	if (nla[NFTA_SET_TIMEOUT] != NULL) {
+		if (!(flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+
+		err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
+		if (err)
+			return err;
+	}
+	desc.gc_int = 0;
+	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+		if (!(flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+	}
+
+	desc.policy = NFT_SET_POL_PERFORMANCE;
+	if (nla[NFTA_SET_POLICY] != NULL) {
+		desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+		switch (desc.policy) {
+		case NFT_SET_POL_PERFORMANCE:
+		case NFT_SET_POL_MEMORY:
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (nla[NFTA_SET_DESC] != NULL) {
+		err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
+		if (err < 0)
+			return err;
+
+		if (desc.field_count > 1) {
+			if (!(flags & NFT_SET_CONCAT))
+				return -EINVAL;
+		} else if (flags & NFT_SET_CONCAT) {
+			return -EINVAL;
+		}
+	} else if (flags & NFT_SET_CONCAT) {
+		return -EINVAL;
+	}
+
+	if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
+		desc.expr = true;
+
+	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+	if (IS_ERR(set)) {
+		if (PTR_ERR(set) != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+			return PTR_ERR(set);
+		}
+	} else {
+		struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+			return -EEXIST;
+		}
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		if (nft_set_is_anonymous(set))
+			return -EOPNOTSUPP;
+
+		err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
+		if (err < 0)
+			return err;
+
+		err = 0;
+		if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+			err = -EEXIST;
+		}
+
+		for (i = 0; i < num_exprs; i++)
+			nft_expr_destroy(&ctx, exprs[i]);
+
+		if (err < 0)
+			return err;
+
+		return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
+	}
+
+	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+		return -ENOENT;
+
+	ops = nft_select_set_ops(&ctx, nla, &desc);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	udlen = 0;
+	if (nla[NFTA_SET_USERDATA])
+		udlen = nla_len(nla[NFTA_SET_USERDATA]);
+
+	size = 0;
+	if (ops->privsize != NULL)
+		size = ops->privsize(nla, &desc);
+	alloc_size = sizeof(*set) + size + udlen;
+	if (alloc_size < size || alloc_size > INT_MAX)
+		return -ENOMEM;
+
+	if (!nft_use_inc(&table->use))
+		return -EMFILE;
+
+	set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
+	if (!set) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
+	if (!name) {
+		err = -ENOMEM;
+		goto err_set_name;
+	}
+
+	err = nf_tables_set_alloc_name(&ctx, set, name);
+	kfree(name);
+	if (err < 0)
+		goto err_set_name;
+
+	udata = NULL;
+	if (udlen) {
+		udata = set->data + size;
+		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+	}
+
+	INIT_LIST_HEAD(&set->bindings);
+	INIT_LIST_HEAD(&set->catchall_list);
+	refcount_set(&set->refs, 1);
+	set->table = table;
+	write_pnet(&set->net, net);
+	set->ops = ops;
+	set->ktype = desc.ktype;
+	set->klen = desc.klen;
+	set->dtype = desc.dtype;
+	set->objtype = desc.objtype;
+	set->dlen = desc.dlen;
+	set->flags = flags;
+	set->size = desc.size;
+	set->policy = desc.policy;
+	set->udlen = udlen;
+	set->udata = udata;
+	set->timeout = desc.timeout;
+	set->gc_int = desc.gc_int;
+
+	set->field_count = desc.field_count;
+	for (i = 0; i < desc.field_count; i++)
+		set->field_len[i] = desc.field_len[i];
+
+	err = ops->init(set, &desc, nla);
+	if (err < 0)
+		goto err_set_init;
+
+	err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
+	if (err < 0)
+		goto err_set_destroy;
+
+	set->num_exprs = num_exprs;
+	set->handle = nf_tables_alloc_handle(table);
+	INIT_LIST_HEAD(&set->pending_update);
+
+	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+	if (err < 0)
+		goto err_set_expr_alloc;
+
+	list_add_tail_rcu(&set->list, &table->sets);
+
+	return 0;
+
+err_set_expr_alloc:
+	for (i = 0; i < set->num_exprs; i++)
+		nft_expr_destroy(&ctx, set->exprs[i]);
+err_set_destroy:
+	ops->destroy(&ctx, set);
+err_set_init:
+	kfree(set->name);
+err_set_name:
+	kvfree(set);
+err_alloc:
+	nft_use_dec_restore(&table->use);
+
+	return err;
+}
+
+static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
+				     struct nft_set *set)
+{
+	struct nft_set_elem_catchall *next, *catchall;
+
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+		list_del_rcu(&catchall->list);
+		nf_tables_set_elem_destroy(ctx, set, catchall->elem);
+		kfree_rcu(catchall, rcu);
+	}
+}
+
+static void nft_set_put(struct nft_set *set)
+{
+	if (refcount_dec_and_test(&set->refs)) {
+		kfree(set->name);
+		kvfree(set);
+	}
+}
+
+static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	int i;
+
+	if (WARN_ON(set->use > 0))
+		return;
+
+	for (i = 0; i < set->num_exprs; i++)
+		nft_expr_destroy(ctx, set->exprs[i]);
+
+	set->ops->destroy(ctx, set);
+	nft_set_catchall_destroy(ctx, set);
+	nft_set_put(set);
+}
+
+static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+
+	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
+		return -EAFNOSUPPORT;
+
+	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_SET_HANDLE]) {
+		attr = nla[NFTA_SET_HANDLE];
+		set = nft_set_lookup_byhandle(table, attr, genmask);
+	} else {
+		attr = nla[NFTA_SET_NAME];
+		set = nft_set_lookup(table, attr, genmask);
+	}
+
+	if (IS_ERR(set)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(set);
+	}
+	if (set->use ||
+	    (info->nlh->nlmsg_flags & NLM_F_NONREC &&
+	     atomic_read(&set->nelems) > 0)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return -EBUSY;
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	return nft_delset(&ctx, set);
+}
+
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+				       enum nft_registers reg,
+				       const struct nft_data *data,
+				       enum nft_data_types type,
+				       unsigned int len);
+
+static int nft_setelem_data_validate(const struct nft_ctx *ctx,
+				     struct nft_set *set,
+				     struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	enum nft_registers dreg;
+
+	dreg = nft_type_to_reg(set->dtype);
+	return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
+					   set->dtype == NFT_DATA_VERDICT ?
+					   NFT_DATA_VERDICT : NFT_DATA_VALUE,
+					   set->dlen);
+}
+
+static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
+					struct nft_set *set,
+					const struct nft_set_iter *iter,
+					struct nft_set_elem *elem)
+{
+	return nft_setelem_data_validate(ctx, set, elem);
+}
+
+static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
+				       struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = nft_setelem_data_validate(ctx, set, &elem);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
+int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+		       struct nft_set_binding *binding)
+{
+	struct nft_set_binding *i;
+	struct nft_set_iter iter;
+
+	if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
+		return -EBUSY;
+
+	if (binding->flags & NFT_SET_MAP) {
+		/* If the set is already bound to the same chain all
+		 * jumps are already validated for that chain.
+		 */
+		list_for_each_entry(i, &set->bindings, list) {
+			if (i->flags & NFT_SET_MAP &&
+			    i->chain == binding->chain)
+				goto bind;
+		}
+
+		iter.genmask	= nft_genmask_next(ctx->net);
+		iter.skip 	= 0;
+		iter.count	= 0;
+		iter.err	= 0;
+		iter.fn		= nf_tables_bind_check_setelem;
+
+		set->ops->walk(ctx, set, &iter);
+		if (!iter.err)
+			iter.err = nft_set_catchall_bind_check(ctx, set);
+
+		if (iter.err < 0)
+			return iter.err;
+	}
+bind:
+	if (!nft_use_inc(&set->use))
+		return -EMFILE;
+
+	binding->chain = ctx->chain;
+	list_add_tail_rcu(&binding->list, &set->bindings);
+	nft_set_trans_bind(ctx, set);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_tables_bind_set);
+
+static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+				 struct nft_set_binding *binding, bool event)
+{
+	list_del_rcu(&binding->list);
+
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
+		list_del_rcu(&set->list);
+		if (event)
+			nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+					     GFP_KERNEL);
+	}
+}
+
+static void nft_setelem_data_activate(const struct net *net,
+				      const struct nft_set *set,
+				      struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+				struct nft_set *set,
+				const struct nft_set_iter *iter,
+				struct nft_set_elem *elem)
+{
+	nft_setelem_data_activate(ctx->net, set, elem);
+
+	return 0;
+}
+
+static void nft_map_catchall_activate(const struct nft_ctx *ctx,
+				      struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		nft_setelem_data_activate(ctx->net, set, &elem);
+		break;
+	}
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	struct nft_set_iter iter = {
+		.genmask	= nft_genmask_next(ctx->net),
+		.fn		= nft_mapelem_activate,
+	};
+
+	set->ops->walk(ctx, set, &iter);
+	WARN_ON_ONCE(iter.err);
+
+	nft_map_catchall_activate(ctx, set);
+}
+
+void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	if (nft_set_is_anonymous(set)) {
+		if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+			nft_map_activate(ctx, set);
+
+		nft_clear(ctx->net, set);
+	}
+
+	nft_use_inc_restore(&set->use);
+}
+EXPORT_SYMBOL_GPL(nf_tables_activate_set);
+
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+			      struct nft_set_binding *binding,
+			      enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE_ERROR:
+		nft_set_trans_unbind(ctx, set);
+		if (nft_set_is_anonymous(set))
+			nft_deactivate_next(ctx->net, set);
+		else
+			list_del_rcu(&binding->list);
+
+		nft_use_dec(&set->use);
+		break;
+	case NFT_TRANS_PREPARE:
+		if (nft_set_is_anonymous(set)) {
+			if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+				nft_map_deactivate(ctx, set);
+
+			nft_deactivate_next(ctx->net, set);
+		}
+		nft_use_dec(&set->use);
+		return;
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		if (nft_set_is_anonymous(set) &&
+		    set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+			nft_map_deactivate(ctx, set);
+
+		nft_use_dec(&set->use);
+		fallthrough;
+	default:
+		nf_tables_unbind_set(ctx, set, binding,
+				     phase == NFT_TRANS_COMMIT);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
+		nft_set_destroy(ctx, set);
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+
+const struct nft_set_ext_type nft_set_ext_types[] = {
+	[NFT_SET_EXT_KEY]		= {
+		.align	= __alignof__(u32),
+	},
+	[NFT_SET_EXT_DATA]		= {
+		.align	= __alignof__(u32),
+	},
+	[NFT_SET_EXT_EXPRESSIONS]	= {
+		.align	= __alignof__(struct nft_set_elem_expr),
+	},
+	[NFT_SET_EXT_OBJREF]		= {
+		.len	= sizeof(struct nft_object *),
+		.align	= __alignof__(struct nft_object *),
+	},
+	[NFT_SET_EXT_FLAGS]		= {
+		.len	= sizeof(u8),
+		.align	= __alignof__(u8),
+	},
+	[NFT_SET_EXT_TIMEOUT]		= {
+		.len	= sizeof(u64),
+		.align	= __alignof__(u64),
+	},
+	[NFT_SET_EXT_EXPIRATION]	= {
+		.len	= sizeof(u64),
+		.align	= __alignof__(u64),
+	},
+	[NFT_SET_EXT_USERDATA]		= {
+		.len	= sizeof(struct nft_userdata),
+		.align	= __alignof__(struct nft_userdata),
+	},
+	[NFT_SET_EXT_KEY_END]		= {
+		.align	= __alignof__(u32),
+	},
+};
+
+/*
+ * Set elements
+ */
+
+static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
+	[NFTA_SET_ELEM_KEY]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
+	[NFTA_SET_ELEM_EXPIRATION]	= { .type = NLA_U64 },
+	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
+					    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING,
+					    .len = NFT_OBJ_MAXNAMELEN - 1 },
+	[NFTA_SET_ELEM_KEY_END]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_EXPRESSIONS]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
+	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING,
+					    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING,
+					    .len = NFT_SET_MAXNAMELEN - 1 },
+	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
+};
+
+static int nft_set_elem_expr_dump(struct sk_buff *skb,
+				  const struct nft_set *set,
+				  const struct nft_set_ext *ext)
+{
+	struct nft_set_elem_expr *elem_expr;
+	u32 size, num_exprs = 0;
+	struct nft_expr *expr;
+	struct nlattr *nest;
+
+	elem_expr = nft_set_ext_expr(ext);
+	nft_setelem_expr_foreach(expr, elem_expr, size)
+		num_exprs++;
+
+	if (num_exprs == 1) {
+		expr = nft_setelem_expr_at(elem_expr, 0);
+		if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr) < 0)
+			return -1;
+
+		return 0;
+	} else if (num_exprs > 1) {
+		nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_EXPRESSIONS);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		nft_setelem_expr_foreach(expr, elem_expr, size) {
+			expr = nft_setelem_expr_at(elem_expr, size);
+			if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nf_tables_fill_setelem(struct sk_buff *skb,
+				  const struct nft_set *set,
+				  const struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+			  NFT_DATA_VALUE, set->klen) < 0)
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_KEY_END, nft_set_ext_key_end(ext),
+			  NFT_DATA_VALUE, set->klen) < 0)
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
+			  set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
+			  set->dlen) < 0)
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
+	    nft_set_elem_expr_dump(skb, set, ext))
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
+	    nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
+			   (*nft_set_ext_obj(ext))->key.name) < 0)
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+		         htonl(*nft_set_ext_flags(ext))))
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+	    nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+			 nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
+			 NFTA_SET_ELEM_PAD))
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+		u64 expires, now = get_jiffies_64();
+
+		expires = *nft_set_ext_expiration(ext);
+		if (time_before64(now, expires))
+			expires -= now;
+		else
+			expires = 0;
+
+		if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+				 nf_jiffies64_to_msecs(expires),
+				 NFTA_SET_ELEM_PAD))
+			goto nla_put_failure;
+	}
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+		struct nft_userdata *udata;
+
+		udata = nft_set_ext_userdata(ext);
+		if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+			    udata->len + 1, udata->data))
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -EMSGSIZE;
+}
+
+struct nft_set_dump_args {
+	const struct netlink_callback	*cb;
+	struct nft_set_iter		iter;
+	struct sk_buff			*skb;
+};
+
+static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+				  struct nft_set *set,
+				  const struct nft_set_iter *iter,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	struct nft_set_dump_args *args;
+
+	if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
+		return 0;
+
+	args = container_of(iter, struct nft_set_dump_args, iter);
+	return nf_tables_fill_setelem(args->skb, set, elem);
+}
+
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
+static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
+				 const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask) ||
+		    nft_set_elem_expired(ext))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = nf_tables_fill_setelem(skb, set, &elem);
+		break;
+	}
+
+	return ret;
+}
+
+static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
+	struct net *net = sock_net(skb->sk);
+	struct nftables_pernet *nft_net;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nft_set_dump_args args;
+	bool set_found = false;
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	u32 portid, seq;
+	int event;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+		    dump_ctx->ctx.family != table->family)
+			continue;
+
+		if (table != dump_ctx->ctx.table)
+			continue;
+
+		list_for_each_entry_rcu(set, &table->sets, list) {
+			if (set == dump_ctx->set) {
+				set_found = true;
+				break;
+			}
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
+	portid = NETLINK_CB(cb->skb).portid;
+	seq    = cb->nlh->nlmsg_seq;
+
+	nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
+			   table->family, NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
+		goto nla_put_failure;
+
+	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	args.cb			= cb;
+	args.skb		= skb;
+	args.iter.genmask	= nft_genmask_cur(net);
+	args.iter.skip		= cb->args[0];
+	args.iter.count		= 0;
+	args.iter.err		= 0;
+	args.iter.fn		= nf_tables_dump_setelem;
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+
+	if (!args.iter.err && args.iter.count == cb->args[0])
+		args.iter.err = nft_set_catchall_dump(net, skb, set);
+	rcu_read_unlock();
+
+	nla_nest_end(skb, nest);
+	nlmsg_end(skb, nlh);
+
+	if (args.iter.err && args.iter.err != -EMSGSIZE)
+		return args.iter.err;
+	if (args.iter.count == cb->args[0])
+		return 0;
+
+	cb->args[0] = args.iter.count;
+	return skb->len;
+
+nla_put_failure:
+	rcu_read_unlock();
+	return -ENOSPC;
+}
+
+static int nf_tables_dump_set_start(struct netlink_callback *cb)
+{
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
+
+	cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC);
+
+	return cb->data ? 0 : -ENOMEM;
+}
+
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+				       const struct nft_ctx *ctx, u32 seq,
+				       u32 portid, int event, u16 flags,
+				       const struct nft_set *set,
+				       const struct nft_set_elem *elem)
+{
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	int err;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+			   NFNETLINK_V0, nft_base_seq(ctx->net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+		goto nla_put_failure;
+
+	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	err = nf_tables_fill_setelem(skb, set, elem);
+	if (err < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nft_setelem_parse_flags(const struct nft_set *set,
+				   const struct nlattr *attr, u32 *flags)
+{
+	if (attr == NULL)
+		return 0;
+
+	*flags = ntohl(nla_get_be32(attr));
+	if (*flags & ~(NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
+		return -EOPNOTSUPP;
+	if (!(set->flags & NFT_SET_INTERVAL) &&
+	    *flags & NFT_SET_ELEM_INTERVAL_END)
+		return -EINVAL;
+	if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) ==
+	    (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+				 struct nft_data *key, struct nlattr *attr)
+{
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= NFT_DATA_VALUE_MAXLEN,
+		.len	= set->klen,
+	};
+
+	return nft_data_init(ctx, key, &desc, attr);
+}
+
+static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+				  struct nft_data_desc *desc,
+				  struct nft_data *data,
+				  struct nlattr *attr)
+{
+	u32 dtype;
+
+	if (set->dtype == NFT_DATA_VERDICT)
+		dtype = NFT_DATA_VERDICT;
+	else
+		dtype = NFT_DATA_VALUE;
+
+	desc->type = dtype;
+	desc->size = NFT_DATA_VALUE_MAXLEN;
+	desc->len = set->dlen;
+	desc->flags = NFT_DATA_DESC_SETELEM;
+
+	return nft_data_init(ctx, data, desc, attr);
+}
+
+static void *nft_setelem_catchall_get(const struct net *net,
+				      const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_ext *ext;
+	void *priv = NULL;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask) ||
+		    nft_set_elem_expired(ext))
+			continue;
+
+		priv = catchall->elem;
+		break;
+	}
+
+	return priv;
+}
+
+static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
+			   struct nft_set_elem *elem, u32 flags)
+{
+	void *priv;
+
+	if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+		priv = set->ops->get(ctx->net, set, elem, flags);
+		if (IS_ERR(priv))
+			return PTR_ERR(priv);
+	} else {
+		priv = nft_setelem_catchall_get(ctx->net, set);
+		if (!priv)
+			return -ENOENT;
+	}
+	elem->priv = priv;
+
+	return 0;
+}
+
+static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+			    const struct nlattr *attr)
+{
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	struct nft_set_elem elem;
+	struct sk_buff *skb;
+	uint32_t flags = 0;
+	int err;
+
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
+	if (err < 0)
+		return err;
+
+	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+	if (err < 0)
+		return err;
+
+	if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+		return -EINVAL;
+
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			return err;
+	}
+
+	if (nla[NFTA_SET_ELEM_KEY_END]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
+					    nla[NFTA_SET_ELEM_KEY_END]);
+		if (err < 0)
+			return err;
+	}
+
+	err = nft_setelem_get(ctx, set, &elem, flags);
+	if (err < 0)
+		return err;
+
+	err = -ENOMEM;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return err;
+
+	err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
+					  NFT_MSG_NEWSETELEM, 0, set, &elem);
+	if (err < 0)
+		goto err_fill_setelem;
+
+	return nfnetlink_unicast(skb, ctx->net, ctx->portid);
+
+err_fill_setelem:
+	kfree_skb(skb);
+	return err;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nlattr *attr;
+	struct nft_ctx ctx;
+	int rem, err = 0;
+
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_set_start,
+			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
+			.module = THIS_MODULE,
+		};
+		struct nft_set_dump_ctx dump_ctx = {
+			.set = set,
+			.ctx = ctx,
+		};
+
+		c.data = &dump_ctx;
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+		return -EINVAL;
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_get_set_elem(&ctx, set, attr);
+		if (err < 0) {
+			NL_SET_BAD_ATTR(extack, attr);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
+				     const struct nft_set *set,
+				     const struct nft_set_elem *elem,
+				     int event)
+{
+	struct nftables_pernet *nft_net;
+	struct net *net = ctx->net;
+	u32 portid = ctx->portid;
+	struct sk_buff *skb;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+					  set, elem);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_net = nft_pernet(net);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+					      int msg_type,
+					      struct nft_set *set)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+	if (trans == NULL)
+		return NULL;
+
+	nft_trans_elem_set(trans) = set;
+	return trans;
+}
+
+struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
+					 const struct nft_set *set,
+					 const struct nlattr *attr)
+{
+	struct nft_expr *expr;
+	int err;
+
+	expr = nft_expr_init(ctx, attr);
+	if (IS_ERR(expr))
+		return expr;
+
+	err = -EOPNOTSUPP;
+	if (expr->ops->type->flags & NFT_EXPR_GC) {
+		if (set->flags & NFT_SET_TIMEOUT)
+			goto err_set_elem_expr;
+		if (!set->ops->gc_init)
+			goto err_set_elem_expr;
+		set->ops->gc_init(set);
+	}
+
+	return expr;
+
+err_set_elem_expr:
+	nft_expr_destroy(ctx, expr);
+	return ERR_PTR(err);
+}
+
+static int nft_set_ext_check(const struct nft_set_ext_tmpl *tmpl, u8 id, u32 len)
+{
+	len += nft_set_ext_types[id].len;
+	if (len > tmpl->ext_len[id] ||
+	    len > U8_MAX)
+		return -1;
+
+	return 0;
+}
+
+static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id,
+			      void *to, const void *from, u32 len)
+{
+	if (nft_set_ext_check(tmpl, id, len) < 0)
+		return -1;
+
+	memcpy(to, from, len);
+
+	return 0;
+}
+
+void *nft_set_elem_init(const struct nft_set *set,
+			const struct nft_set_ext_tmpl *tmpl,
+			const u32 *key, const u32 *key_end,
+			const u32 *data, u64 timeout, u64 expiration, gfp_t gfp)
+{
+	struct nft_set_ext *ext;
+	void *elem;
+
+	elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+	if (elem == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ext = nft_set_elem_ext(set, elem);
+	nft_set_ext_init(ext, tmpl);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) &&
+	    nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY,
+			       nft_set_ext_key(ext), key, set->klen) < 0)
+		goto err_ext_check;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) &&
+	    nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY_END,
+			       nft_set_ext_key_end(ext), key_end, set->klen) < 0)
+		goto err_ext_check;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+	    nft_set_ext_memcpy(tmpl, NFT_SET_EXT_DATA,
+			       nft_set_ext_data(ext), data, set->dlen) < 0)
+		goto err_ext_check;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+		*nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+		if (expiration == 0)
+			*nft_set_ext_expiration(ext) += timeout;
+	}
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+		*nft_set_ext_timeout(ext) = timeout;
+
+	return elem;
+
+err_ext_check:
+	kfree(elem);
+
+	return ERR_PTR(-EINVAL);
+}
+
+static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
+					struct nft_expr *expr)
+{
+	if (expr->ops->destroy_clone) {
+		expr->ops->destroy_clone(ctx, expr);
+		module_put(expr->ops->type->owner);
+	} else {
+		nf_tables_expr_destroy(ctx, expr);
+	}
+}
+
+static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
+				      struct nft_set_elem_expr *elem_expr)
+{
+	struct nft_expr *expr;
+	u32 size;
+
+	nft_setelem_expr_foreach(expr, elem_expr, size)
+		__nft_set_elem_expr_destroy(ctx, expr);
+}
+
+/* Drop references and destroy. Called from gc, dynset and abort path. */
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+			  bool destroy_expr)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+	struct nft_ctx ctx = {
+		.net	= read_pnet(&set->net),
+		.family	= set->table->family,
+	};
+
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
+		nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+	kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
+ */
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+				const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
+		nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
+
+	kfree(elem);
+}
+
+int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
+			    struct nft_expr *expr_array[])
+{
+	struct nft_expr *expr;
+	int err, i, k;
+
+	for (i = 0; i < set->num_exprs; i++) {
+		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
+		if (!expr)
+			goto err_expr;
+
+		err = nft_expr_clone(expr, set->exprs[i]);
+		if (err < 0) {
+			kfree(expr);
+			goto err_expr;
+		}
+		expr_array[i] = expr;
+	}
+
+	return 0;
+
+err_expr:
+	for (k = i - 1; k >= 0; k--)
+		nft_expr_destroy(ctx, expr_array[k]);
+
+	return -ENOMEM;
+}
+
+static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
+				   const struct nft_set_ext_tmpl *tmpl,
+				   const struct nft_set_ext *ext,
+				   struct nft_expr *expr_array[],
+				   u32 num_exprs)
+{
+	struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext);
+	u32 len = sizeof(struct nft_set_elem_expr);
+	struct nft_expr *expr;
+	int i, err;
+
+	if (num_exprs == 0)
+		return 0;
+
+	for (i = 0; i < num_exprs; i++)
+		len += expr_array[i]->ops->size;
+
+	if (nft_set_ext_check(tmpl, NFT_SET_EXT_EXPRESSIONS, len) < 0)
+		return -EINVAL;
+
+	for (i = 0; i < num_exprs; i++) {
+		expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
+		err = nft_expr_clone(expr, expr_array[i]);
+		if (err < 0)
+			goto err_elem_expr_setup;
+
+		elem_expr->size += expr_array[i]->ops->size;
+		nft_expr_destroy(ctx, expr_array[i]);
+		expr_array[i] = NULL;
+	}
+
+	return 0;
+
+err_elem_expr_setup:
+	for (; i < num_exprs; i++) {
+		nft_expr_destroy(ctx, expr_array[i]);
+		expr_array[i] = NULL;
+	}
+
+	return -ENOMEM;
+}
+
+struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+					    const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_ext *ext;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (nft_set_elem_active(ext, genmask) &&
+		    !nft_set_elem_expired(ext) &&
+		    !nft_set_elem_is_dead(ext))
+			return ext;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
+
+static int nft_setelem_catchall_insert(const struct net *net,
+				       struct nft_set *set,
+				       const struct nft_set_elem *elem,
+				       struct nft_set_ext **pext)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_next(net);
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (nft_set_elem_active(ext, genmask)) {
+			*pext = ext;
+			return -EEXIST;
+		}
+	}
+
+	catchall = kmalloc(sizeof(*catchall), GFP_KERNEL);
+	if (!catchall)
+		return -ENOMEM;
+
+	catchall->elem = elem->priv;
+	list_add_tail_rcu(&catchall->list, &set->catchall_list);
+
+	return 0;
+}
+
+static int nft_setelem_insert(const struct net *net,
+			      struct nft_set *set,
+			      const struct nft_set_elem *elem,
+			      struct nft_set_ext **ext, unsigned int flags)
+{
+	int ret;
+
+	if (flags & NFT_SET_ELEM_CATCHALL)
+		ret = nft_setelem_catchall_insert(net, set, elem, ext);
+	else
+		ret = set->ops->insert(net, set, elem, ext);
+
+	return ret;
+}
+
+static bool nft_setelem_is_catchall(const struct nft_set *set,
+				    const struct nft_set_elem *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL)
+		return true;
+
+	return false;
+}
+
+static void nft_setelem_activate(struct net *net, struct nft_set *set,
+				 struct nft_set_elem *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_setelem_is_catchall(set, elem)) {
+		nft_set_elem_change_active(net, set, ext);
+	} else {
+		set->ops->activate(net, set, elem);
+	}
+}
+
+static int nft_setelem_catchall_deactivate(const struct net *net,
+					   struct nft_set *set,
+					   struct nft_set_elem *elem)
+{
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_is_active_next(net, ext))
+			continue;
+
+		kfree(elem->priv);
+		elem->priv = catchall->elem;
+		nft_set_elem_change_active(net, set, ext);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int __nft_setelem_deactivate(const struct net *net,
+				    struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	void *priv;
+
+	priv = set->ops->deactivate(net, set, elem);
+	if (!priv)
+		return -ENOENT;
+
+	kfree(elem->priv);
+	elem->priv = priv;
+	set->ndeact++;
+
+	return 0;
+}
+
+static int nft_setelem_deactivate(const struct net *net,
+				  struct nft_set *set,
+				  struct nft_set_elem *elem, u32 flags)
+{
+	int ret;
+
+	if (flags & NFT_SET_ELEM_CATCHALL)
+		ret = nft_setelem_catchall_deactivate(net, set, elem);
+	else
+		ret = __nft_setelem_deactivate(net, set, elem);
+
+	return ret;
+}
+
+static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall)
+{
+	list_del_rcu(&catchall->list);
+	kfree_rcu(catchall, rcu);
+}
+
+static void nft_setelem_catchall_remove(const struct net *net,
+					const struct nft_set *set,
+					const struct nft_set_elem *elem)
+{
+	struct nft_set_elem_catchall *catchall, *next;
+
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+		if (catchall->elem == elem->priv) {
+			nft_setelem_catchall_destroy(catchall);
+			break;
+		}
+	}
+}
+
+static void nft_setelem_remove(const struct net *net,
+			       const struct nft_set *set,
+			       const struct nft_set_elem *elem)
+{
+	if (nft_setelem_is_catchall(set, elem))
+		nft_setelem_catchall_remove(net, set, elem);
+	else
+		set->ops->remove(net, set, elem);
+}
+
+static bool nft_setelem_valid_key_end(const struct nft_set *set,
+				      struct nlattr **nla, u32 flags)
+{
+	if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) ==
+			  (NFT_SET_CONCAT | NFT_SET_INTERVAL)) {
+		if (flags & NFT_SET_ELEM_INTERVAL_END)
+			return false;
+
+		if (nla[NFTA_SET_ELEM_KEY_END] &&
+		    flags & NFT_SET_ELEM_CATCHALL)
+			return false;
+	} else {
+		if (nla[NFTA_SET_ELEM_KEY_END])
+			return false;
+	}
+
+	return true;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+			    const struct nlattr *attr, u32 nlmsg_flags)
+{
+	struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	u8 genmask = nft_genmask_next(ctx->net);
+	u32 flags = 0, size = 0, num_exprs = 0;
+	struct nft_set_ext_tmpl tmpl;
+	struct nft_set_ext *ext, *ext2;
+	struct nft_set_elem elem;
+	struct nft_set_binding *binding;
+	struct nft_object *obj = NULL;
+	struct nft_userdata *udata;
+	struct nft_data_desc desc;
+	enum nft_registers dreg;
+	struct nft_trans *trans;
+	u64 timeout;
+	u64 expiration;
+	int err, i;
+	u8 ulen;
+
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
+	if (err < 0)
+		return err;
+
+	nft_set_ext_prepare(&tmpl);
+
+	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+	if (err < 0)
+		return err;
+
+	if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) ||
+	    (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY]))
+		return -EINVAL;
+
+	if (flags != 0) {
+		err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+		if (err < 0)
+			return err;
+	}
+
+	if (set->flags & NFT_SET_MAP) {
+		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
+		    !(flags & NFT_SET_ELEM_INTERVAL_END))
+			return -EINVAL;
+	} else {
+		if (nla[NFTA_SET_ELEM_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	if (set->flags & NFT_SET_OBJECT) {
+		if (!nla[NFTA_SET_ELEM_OBJREF] &&
+		    !(flags & NFT_SET_ELEM_INTERVAL_END))
+			return -EINVAL;
+	} else {
+		if (nla[NFTA_SET_ELEM_OBJREF])
+			return -EINVAL;
+	}
+
+	if (!nft_setelem_valid_key_end(set, nla, flags))
+		return -EINVAL;
+
+	if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
+	     (nla[NFTA_SET_ELEM_DATA] ||
+	      nla[NFTA_SET_ELEM_OBJREF] ||
+	      nla[NFTA_SET_ELEM_TIMEOUT] ||
+	      nla[NFTA_SET_ELEM_EXPIRATION] ||
+	      nla[NFTA_SET_ELEM_USERDATA] ||
+	      nla[NFTA_SET_ELEM_EXPR] ||
+	      nla[NFTA_SET_ELEM_KEY_END] ||
+	      nla[NFTA_SET_ELEM_EXPRESSIONS]))
+		return -EINVAL;
+
+	timeout = 0;
+	if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT],
+					    &timeout);
+		if (err)
+			return err;
+	} else if (set->flags & NFT_SET_TIMEOUT &&
+		   !(flags & NFT_SET_ELEM_INTERVAL_END)) {
+		timeout = READ_ONCE(set->timeout);
+	}
+
+	expiration = 0;
+	if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
+					    &expiration);
+		if (err)
+			return err;
+	}
+
+	if (nla[NFTA_SET_ELEM_EXPR]) {
+		struct nft_expr *expr;
+
+		if (set->num_exprs && set->num_exprs != 1)
+			return -EOPNOTSUPP;
+
+		expr = nft_set_elem_expr_alloc(ctx, set,
+					       nla[NFTA_SET_ELEM_EXPR]);
+		if (IS_ERR(expr))
+			return PTR_ERR(expr);
+
+		expr_array[0] = expr;
+		num_exprs = 1;
+
+		if (set->num_exprs && set->exprs[0]->ops != expr->ops) {
+			err = -EOPNOTSUPP;
+			goto err_set_elem_expr;
+		}
+	} else if (nla[NFTA_SET_ELEM_EXPRESSIONS]) {
+		struct nft_expr *expr;
+		struct nlattr *tmp;
+		int left;
+
+		i = 0;
+		nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
+			if (i == NFT_SET_EXPR_MAX ||
+			    (set->num_exprs && set->num_exprs == i)) {
+				err = -E2BIG;
+				goto err_set_elem_expr;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_set_elem_expr;
+			}
+			expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+			if (IS_ERR(expr)) {
+				err = PTR_ERR(expr);
+				goto err_set_elem_expr;
+			}
+			expr_array[i] = expr;
+			num_exprs++;
+
+			if (set->num_exprs && expr->ops != set->exprs[i]->ops) {
+				err = -EOPNOTSUPP;
+				goto err_set_elem_expr;
+			}
+			i++;
+		}
+		if (set->num_exprs && set->num_exprs != i) {
+			err = -EOPNOTSUPP;
+			goto err_set_elem_expr;
+		}
+	} else if (set->num_exprs > 0 &&
+		   !(flags & NFT_SET_ELEM_INTERVAL_END)) {
+		err = nft_set_elem_expr_clone(ctx, set, expr_array);
+		if (err < 0)
+			goto err_set_elem_expr_clone;
+
+		num_exprs = set->num_exprs;
+	}
+
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			goto err_set_elem_expr;
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+		if (err < 0)
+			goto err_parse_key;
+	}
+
+	if (nla[NFTA_SET_ELEM_KEY_END]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
+					    nla[NFTA_SET_ELEM_KEY_END]);
+		if (err < 0)
+			goto err_parse_key;
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+		if (err < 0)
+			goto err_parse_key_end;
+	}
+
+	if (timeout > 0) {
+		err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+		if (err < 0)
+			goto err_parse_key_end;
+
+		if (timeout != READ_ONCE(set->timeout)) {
+			err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+			if (err < 0)
+				goto err_parse_key_end;
+		}
+	}
+
+	if (num_exprs) {
+		for (i = 0; i < num_exprs; i++)
+			size += expr_array[i]->ops->size;
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
+					     sizeof(struct nft_set_elem_expr) + size);
+		if (err < 0)
+			goto err_parse_key_end;
+	}
+
+	if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
+		obj = nft_obj_lookup(ctx->net, ctx->table,
+				     nla[NFTA_SET_ELEM_OBJREF],
+				     set->objtype, genmask);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			obj = NULL;
+			goto err_parse_key_end;
+		}
+
+		if (!nft_use_inc(&obj->use)) {
+			err = -EMFILE;
+			obj = NULL;
+			goto err_parse_key_end;
+		}
+
+		err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+		if (err < 0)
+			goto err_parse_key_end;
+	}
+
+	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
+		err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val,
+					     nla[NFTA_SET_ELEM_DATA]);
+		if (err < 0)
+			goto err_parse_key_end;
+
+		dreg = nft_type_to_reg(set->dtype);
+		list_for_each_entry(binding, &set->bindings, list) {
+			struct nft_ctx bind_ctx = {
+				.net	= ctx->net,
+				.family	= ctx->family,
+				.table	= ctx->table,
+				.chain	= (struct nft_chain *)binding->chain,
+			};
+
+			if (!(binding->flags & NFT_SET_MAP))
+				continue;
+
+			err = nft_validate_register_store(&bind_ctx, dreg,
+							  &elem.data.val,
+							  desc.type, desc.len);
+			if (err < 0)
+				goto err_parse_data;
+
+			if (desc.type == NFT_DATA_VERDICT &&
+			    (elem.data.val.verdict.code == NFT_GOTO ||
+			     elem.data.val.verdict.code == NFT_JUMP))
+				nft_validate_state_update(ctx->net,
+							  NFT_VALIDATE_NEED);
+		}
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+		if (err < 0)
+			goto err_parse_data;
+	}
+
+	/* The full maximum length of userdata can exceed the maximum
+	 * offset value (U8_MAX) for following extensions, therefor it
+	 * must be the last extension added.
+	 */
+	ulen = 0;
+	if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+		ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+		if (ulen > 0) {
+			err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+						     ulen);
+			if (err < 0)
+				goto err_parse_data;
+		}
+	}
+
+	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
+				      elem.key_end.val.data, elem.data.val.data,
+				      timeout, expiration, GFP_KERNEL_ACCOUNT);
+	if (IS_ERR(elem.priv)) {
+		err = PTR_ERR(elem.priv);
+		goto err_parse_data;
+	}
+
+	ext = nft_set_elem_ext(set, elem.priv);
+	if (flags)
+		*nft_set_ext_flags(ext) = flags;
+
+	if (obj)
+		*nft_set_ext_obj(ext) = obj;
+
+	if (ulen > 0) {
+		if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
+			err = -EINVAL;
+			goto err_elem_free;
+		}
+		udata = nft_set_ext_userdata(ext);
+		udata->len = ulen - 1;
+		nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+	}
+	err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
+	if (err < 0)
+		goto err_elem_free;
+
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+	if (trans == NULL) {
+		err = -ENOMEM;
+		goto err_elem_free;
+	}
+
+	ext->genmask = nft_genmask_cur(ctx->net);
+
+	err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
+	if (err) {
+		if (err == -EEXIST) {
+			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+			    nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+				goto err_element_clash;
+			if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+			     nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+			     memcmp(nft_set_ext_data(ext),
+				    nft_set_ext_data(ext2), set->dlen) != 0) ||
+			    (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
+			     nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
+			     *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
+				goto err_element_clash;
+			else if (!(nlmsg_flags & NLM_F_EXCL))
+				err = 0;
+		} else if (err == -ENOTEMPTY) {
+			/* ENOTEMPTY reports overlapping between this element
+			 * and an existing one.
+			 */
+			err = -EEXIST;
+		}
+		goto err_element_clash;
+	}
+
+	if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
+	    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
+		err = -ENFILE;
+		goto err_set_full;
+	}
+
+	nft_trans_elem(trans) = elem;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+	return 0;
+
+err_set_full:
+	nft_setelem_remove(ctx->net, set, &elem);
+err_element_clash:
+	kfree(trans);
+err_elem_free:
+	nf_tables_set_elem_destroy(ctx, set, elem.priv);
+err_parse_data:
+	if (nla[NFTA_SET_ELEM_DATA] != NULL)
+		nft_data_release(&elem.data.val, desc.type);
+err_parse_key_end:
+	if (obj)
+		nft_use_dec_restore(&obj->use);
+
+	nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
+err_parse_key:
+	nft_data_release(&elem.key.val, NFT_DATA_VALUE);
+err_set_elem_expr:
+	for (i = 0; i < num_exprs && expr_array[i]; i++)
+		nft_expr_destroy(ctx, expr_array[i]);
+err_set_elem_expr_clone:
+	return err;
+}
+
+static int nf_tables_newsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	int rem, err;
+
+	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
+				    nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (!list_empty(&set->bindings) &&
+	    (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+		return -EBUSY;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
+		if (err < 0) {
+			NL_SET_BAD_ATTR(extack, attr);
+			return err;
+		}
+	}
+
+	if (nft_net->validate_state == NFT_VALIDATE_DO)
+		return nft_table_validate(net, table);
+
+	return 0;
+}
+
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	struct nft_chain *chain;
+
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			chain = data->verdict.chain;
+			nft_use_inc_restore(&chain->use);
+			break;
+		}
+	}
+}
+
+static void nft_setelem_data_activate(const struct net *net,
+				      const struct nft_set *set,
+				      struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+}
+
+void nft_setelem_data_deactivate(const struct net *net,
+				 const struct nft_set *set,
+				 struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+}
+
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+			   const struct nlattr *attr)
+{
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	struct nft_set_ext_tmpl tmpl;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	struct nft_trans *trans;
+	u32 flags = 0;
+	int err;
+
+	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
+					  nft_set_elem_policy, NULL);
+	if (err < 0)
+		return err;
+
+	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+	if (err < 0)
+		return err;
+
+	if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+		return -EINVAL;
+
+	if (!nft_setelem_valid_key_end(set, nla, flags))
+		return -EINVAL;
+
+	nft_set_ext_prepare(&tmpl);
+
+	if (flags != 0) {
+		err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+		if (err < 0)
+			return err;
+	}
+
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			return err;
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+		if (err < 0)
+			goto fail_elem;
+	}
+
+	if (nla[NFTA_SET_ELEM_KEY_END]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
+					    nla[NFTA_SET_ELEM_KEY_END]);
+		if (err < 0)
+			goto fail_elem;
+
+		err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+		if (err < 0)
+			goto fail_elem_key_end;
+	}
+
+	err = -ENOMEM;
+	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
+				      elem.key_end.val.data, NULL, 0, 0,
+				      GFP_KERNEL_ACCOUNT);
+	if (IS_ERR(elem.priv)) {
+		err = PTR_ERR(elem.priv);
+		goto fail_elem_key_end;
+	}
+
+	ext = nft_set_elem_ext(set, elem.priv);
+	if (flags)
+		*nft_set_ext_flags(ext) = flags;
+
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+	if (trans == NULL)
+		goto fail_trans;
+
+	err = nft_setelem_deactivate(ctx->net, set, &elem, flags);
+	if (err < 0)
+		goto fail_ops;
+
+	nft_setelem_data_deactivate(ctx->net, set, &elem);
+
+	nft_trans_elem(trans) = elem;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+	return 0;
+
+fail_ops:
+	kfree(trans);
+fail_trans:
+	kfree(elem.priv);
+fail_elem_key_end:
+	nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
+fail_elem:
+	nft_data_release(&elem.key.val, NFT_DATA_VALUE);
+	return err;
+}
+
+static int nft_setelem_flush(const struct nft_ctx *ctx,
+			     struct nft_set *set,
+			     const struct nft_set_iter *iter,
+			     struct nft_set_elem *elem)
+{
+	struct nft_trans *trans;
+	int err;
+
+	trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+				    sizeof(struct nft_trans_elem), GFP_ATOMIC);
+	if (!trans)
+		return -ENOMEM;
+
+	if (!set->ops->flush(ctx->net, set, elem->priv)) {
+		err = -ENOENT;
+		goto err1;
+	}
+	set->ndeact++;
+
+	nft_setelem_data_deactivate(ctx->net, set, elem);
+	nft_trans_elem_set(trans) = set;
+	nft_trans_elem(trans) = *elem;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+err1:
+	kfree(trans);
+	return err;
+}
+
+static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
+				    struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+				    sizeof(struct nft_trans_elem), GFP_KERNEL);
+	if (!trans)
+		return -ENOMEM;
+
+	nft_setelem_data_deactivate(ctx->net, set, elem);
+	nft_trans_elem_set(trans) = set;
+	nft_trans_elem(trans) = *elem;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+}
+
+static int nft_set_catchall_flush(const struct nft_ctx *ctx,
+				  struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = __nft_set_catchall_flush(ctx, set, &elem);
+		if (ret < 0)
+			break;
+		nft_set_elem_change_active(ctx->net, set, ext);
+	}
+
+	return ret;
+}
+
+static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
+{
+	struct nft_set_iter iter = {
+		.genmask	= genmask,
+		.fn		= nft_setelem_flush,
+	};
+
+	set->ops->walk(ctx, set, &iter);
+	if (!iter.err)
+		iter.err = nft_set_catchall_flush(ctx, set);
+
+	return iter.err;
+}
+
+static int nf_tables_delsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nft_ctx ctx;
+	int rem, err = 0;
+
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (nft_set_is_anonymous(set))
+		return -EOPNOTSUPP;
+
+	if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
+		return -EBUSY;
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+		return nft_set_flush(&ctx, set, genmask);
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_del_setelem(&ctx, set, attr);
+		if (err < 0) {
+			NL_SET_BAD_ATTR(extack, attr);
+			break;
+		}
+	}
+	return err;
+}
+
+/*
+ * Stateful objects
+ */
+
+/**
+ *	nft_register_obj- register nf_tables stateful object type
+ *	@obj_type: object type
+ *
+ *	Registers the object type for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_obj(struct nft_object_type *obj_type)
+{
+	if (obj_type->type == NFT_OBJECT_UNSPEC)
+		return -EINVAL;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_rcu(&obj_type->list, &nf_tables_objects);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_obj);
+
+/**
+ *	nft_unregister_obj - unregister nf_tables object type
+ *	@obj_type: object type
+ *
+ * 	Unregisters the object type for use with nf_tables.
+ */
+void nft_unregister_obj(struct nft_object_type *obj_type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del_rcu(&obj_type->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_obj);
+
+struct nft_object *nft_obj_lookup(const struct net *net,
+				  const struct nft_table *table,
+				  const struct nlattr *nla, u32 objtype,
+				  u8 genmask)
+{
+	struct nft_object_hash_key k = { .table = table };
+	char search[NFT_OBJ_MAXNAMELEN];
+	struct rhlist_head *tmp, *list;
+	struct nft_object *obj;
+
+	nla_strscpy(search, nla, sizeof(search));
+	k.name = search;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() &&
+		     !lockdep_commit_lock_is_held(net));
+
+	rcu_read_lock();
+	list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params);
+	if (!list)
+		goto out;
+
+	rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) {
+		if (objtype == obj->ops->type->type &&
+		    nft_active_genmask(obj, genmask)) {
+			rcu_read_unlock();
+			return obj;
+		}
+	}
+out:
+	rcu_read_unlock();
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(nft_obj_lookup);
+
+static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table,
+						  const struct nlattr *nla,
+						  u32 objtype, u8 genmask)
+{
+	struct nft_object *obj;
+
+	list_for_each_entry(obj, &table->objects, list) {
+		if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
+		    objtype == obj->ops->type->type &&
+		    nft_active_genmask(obj, genmask))
+			return obj;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
+	[NFTA_OBJ_TABLE]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
+	[NFTA_OBJ_NAME]		= { .type = NLA_STRING,
+				    .len = NFT_OBJ_MAXNAMELEN - 1 },
+	[NFTA_OBJ_TYPE]		= { .type = NLA_U32 },
+	[NFTA_OBJ_DATA]		= { .type = NLA_NESTED },
+	[NFTA_OBJ_HANDLE]	= { .type = NLA_U64},
+	[NFTA_OBJ_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
+};
+
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+				       const struct nft_object_type *type,
+				       const struct nlattr *attr)
+{
+	struct nlattr **tb;
+	const struct nft_object_ops *ops;
+	struct nft_object *obj;
+	int err = -ENOMEM;
+
+	tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		goto err1;
+
+	if (attr) {
+		err = nla_parse_nested_deprecated(tb, type->maxattr, attr,
+						  type->policy, NULL);
+		if (err < 0)
+			goto err2;
+	} else {
+		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
+	}
+
+	if (type->select_ops) {
+		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+			goto err2;
+		}
+	} else {
+		ops = type->ops;
+	}
+
+	err = -ENOMEM;
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
+	if (!obj)
+		goto err2;
+
+	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
+	if (err < 0)
+		goto err3;
+
+	obj->ops = ops;
+
+	kfree(tb);
+	return obj;
+err3:
+	kfree(obj);
+err2:
+	kfree(tb);
+err1:
+	return ERR_PTR(err);
+}
+
+static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
+			   struct nft_object *obj, bool reset)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start_noflag(skb, attr);
+	if (!nest)
+		goto nla_put_failure;
+	if (obj->ops->dump(skb, obj, reset) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+{
+	const struct nft_object_type *type;
+
+	list_for_each_entry(type, &nf_tables_objects, list) {
+		if (objtype == type->type)
+			return type;
+	}
+	return NULL;
+}
+
+static const struct nft_object_type *
+nft_obj_type_get(struct net *net, u32 objtype)
+{
+	const struct nft_object_type *type;
+
+	type = __nft_obj_type_get(objtype);
+	if (type != NULL && try_module_get(type->owner))
+		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
+#ifdef CONFIG_MODULES
+	if (type == NULL) {
+		if (nft_request_module(net, "nft-obj-%u", objtype) == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_updobj(const struct nft_ctx *ctx,
+			    const struct nft_object_type *type,
+			    const struct nlattr *attr,
+			    struct nft_object *obj)
+{
+	struct nft_object *newobj;
+	struct nft_trans *trans;
+	int err = -ENOMEM;
+
+	if (!try_module_get(type->owner))
+		return -ENOENT;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
+				sizeof(struct nft_trans_obj));
+	if (!trans)
+		goto err_trans;
+
+	newobj = nft_obj_init(ctx, type, attr);
+	if (IS_ERR(newobj)) {
+		err = PTR_ERR(newobj);
+		goto err_free_trans;
+	}
+
+	nft_trans_obj(trans) = obj;
+	nft_trans_obj_update(trans) = true;
+	nft_trans_obj_newobj(trans) = newobj;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+
+err_free_trans:
+	kfree(trans);
+err_trans:
+	module_put(type->owner);
+	return err;
+}
+
+static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_object_type *type;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct nft_object *obj;
+	struct nft_ctx ctx;
+	u32 objtype;
+	int err;
+
+	if (!nla[NFTA_OBJ_TYPE] ||
+	    !nla[NFTA_OBJ_NAME] ||
+	    !nla[NFTA_OBJ_DATA])
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		if (err != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+			return err;
+		}
+	} else {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+			return -EEXIST;
+		}
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		type = __nft_obj_type_get(objtype);
+		if (WARN_ON_ONCE(!type))
+			return -ENOENT;
+
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+		return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (!nft_use_inc(&table->use))
+		return -EMFILE;
+
+	type = nft_obj_type_get(net, objtype);
+	if (IS_ERR(type)) {
+		err = PTR_ERR(type);
+		goto err_type;
+	}
+
+	obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_init;
+	}
+	obj->key.table = table;
+	obj->handle = nf_tables_alloc_handle(table);
+
+	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
+	if (!obj->key.name) {
+		err = -ENOMEM;
+		goto err_strdup;
+	}
+
+	if (nla[NFTA_OBJ_USERDATA]) {
+		obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL_ACCOUNT);
+		if (obj->udata == NULL)
+			goto err_userdata;
+
+		obj->udlen = nla_len(nla[NFTA_OBJ_USERDATA]);
+	}
+
+	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
+	if (err < 0)
+		goto err_trans;
+
+	err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
+			      nft_objname_ht_params);
+	if (err < 0)
+		goto err_obj_ht;
+
+	list_add_tail_rcu(&obj->list, &table->objects);
+
+	return 0;
+err_obj_ht:
+	/* queued in transaction log */
+	INIT_LIST_HEAD(&obj->list);
+	return err;
+err_trans:
+	kfree(obj->udata);
+err_userdata:
+	kfree(obj->key.name);
+err_strdup:
+	if (obj->ops->destroy)
+		obj->ops->destroy(&ctx, obj);
+	kfree(obj);
+err_init:
+	module_put(type->owner);
+err_type:
+	nft_use_dec_restore(&table->use);
+
+	return err;
+}
+
+static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
+				   u32 portid, u32 seq, int event, u32 flags,
+				   int family, const struct nft_table *table,
+				   struct nft_object *obj, bool reset)
+{
+	struct nlmsghdr *nlh;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
+	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
+	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
+	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
+	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
+	    nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
+			 NFTA_OBJ_PAD))
+		goto nla_put_failure;
+
+	if (obj->udata &&
+	    nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static void audit_log_obj_reset(const struct nft_table *table,
+				unsigned int base_seq, unsigned int nentries)
+{
+	char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+	audit_log_nfcfg(buf, table->family, nentries,
+			AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+	kfree(buf);
+}
+
+struct nft_obj_filter {
+	char		*table;
+	u32		type;
+};
+
+static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_table *table;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct nft_obj_filter *filter = cb->data;
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
+	unsigned int entries = 0;
+	struct nft_object *obj;
+	bool reset = false;
+	int rc = 0;
+
+	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+		reset = true;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (family != NFPROTO_UNSPEC && family != table->family)
+			continue;
+
+		entries = 0;
+		list_for_each_entry_rcu(obj, &table->objects, list) {
+			if (!nft_is_active(net, obj))
+				goto cont;
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (filter && filter->table &&
+			    strcmp(filter->table, table->name))
+				goto cont;
+			if (filter &&
+			    filter->type != NFT_OBJECT_UNSPEC &&
+			    obj->ops->type->type != filter->type)
+				goto cont;
+
+			rc = nf_tables_fill_obj_info(skb, net,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     NFT_MSG_NEWOBJ,
+						     NLM_F_MULTI | NLM_F_APPEND,
+						     table->family, table,
+						     obj, reset);
+			if (rc < 0)
+				break;
+
+			entries++;
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+		if (reset && entries)
+			audit_log_obj_reset(table, nft_net->base_seq, entries);
+		if (rc < 0)
+			break;
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_dump_obj_start(struct netlink_callback *cb)
+{
+	const struct nlattr * const *nla = cb->data;
+	struct nft_obj_filter *filter = NULL;
+
+	if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
+		filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+		if (!filter)
+			return -ENOMEM;
+
+		if (nla[NFTA_OBJ_TABLE]) {
+			filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+			if (!filter->table) {
+				kfree(filter);
+				return -ENOMEM;
+			}
+		}
+
+		if (nla[NFTA_OBJ_TYPE])
+			filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+	}
+
+	cb->data = filter;
+	return 0;
+}
+
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+{
+	struct nft_obj_filter *filter = cb->data;
+
+	if (filter) {
+		kfree(filter->table);
+		kfree(filter);
+	}
+
+	return 0;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nft_table *table;
+	struct net *net = info->net;
+	struct nft_object *obj;
+	struct sk_buff *skb2;
+	bool reset = false;
+	u32 objtype;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_obj_start,
+			.dump = nf_tables_dump_obj,
+			.done = nf_tables_dump_obj_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!nla[NFTA_OBJ_NAME] ||
+	    !nla[NFTA_OBJ_TYPE])
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	if (IS_ERR(obj)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+		return PTR_ERR(obj);
+	}
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+		reset = true;
+
+	if (reset) {
+		const struct nftables_pernet *nft_net;
+		char *buf;
+
+		nft_net = nft_pernet(net);
+		buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+
+		audit_log_nfcfg(buf,
+				family,
+				1,
+				AUDIT_NFT_OP_OBJ_RESET,
+				GFP_ATOMIC);
+		kfree(buf);
+	}
+
+	err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
+				      info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+				      family, table, obj, reset);
+	if (err < 0)
+		goto err_fill_obj_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_obj_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+{
+	if (obj->ops->destroy)
+		obj->ops->destroy(ctx, obj);
+
+	module_put(obj->ops->type->owner);
+	kfree(obj->key.name);
+	kfree(obj->udata);
+	kfree(obj);
+}
+
+static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_object *obj;
+	struct nft_ctx ctx;
+	u32 objtype;
+
+	if (!nla[NFTA_OBJ_TYPE] ||
+	    (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+	if (nla[NFTA_OBJ_HANDLE]) {
+		attr = nla[NFTA_OBJ_HANDLE];
+		obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
+	} else {
+		attr = nla[NFTA_OBJ_NAME];
+		obj = nft_obj_lookup(net, table, attr, objtype, genmask);
+	}
+
+	if (IS_ERR(obj)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(obj);
+	}
+	if (obj->use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return -EBUSY;
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	return nft_delobj(&ctx, obj);
+}
+
+static void
+__nft_obj_notify(struct net *net, const struct nft_table *table,
+		 struct nft_object *obj, u32 portid, u32 seq, int event,
+		 u16 flags, int family, int report, gfp_t gfp)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct sk_buff *skb;
+	int err;
+
+	if (!report &&
+	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_obj_info(skb, net, portid, seq, event,
+				      flags & (NLM_F_CREATE | NLM_F_EXCL),
+				      family, table, obj, false);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_notify_enqueue(skb, report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+void nft_obj_notify(struct net *net, const struct nft_table *table,
+		    struct nft_object *obj, u32 portid, u32 seq, int event,
+		    u16 flags, int family, int report, gfp_t gfp)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	char *buf = kasprintf(gfp, "%s:%u",
+			      table->name, nft_net->base_seq);
+
+	audit_log_nfcfg(buf,
+			family,
+			obj->handle,
+			event == NFT_MSG_NEWOBJ ?
+				 AUDIT_NFT_OP_OBJ_REGISTER :
+				 AUDIT_NFT_OP_OBJ_UNREGISTER,
+			gfp);
+	kfree(buf);
+
+	__nft_obj_notify(net, table, obj, portid, seq, event,
+			 flags, family, report, gfp);
+}
+EXPORT_SYMBOL_GPL(nft_obj_notify);
+
+static void nf_tables_obj_notify(const struct nft_ctx *ctx,
+				 struct nft_object *obj, int event)
+{
+	__nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+			 ctx->seq, event, ctx->flags, ctx->family,
+			 ctx->report, GFP_KERNEL);
+}
+
+/*
+ * Flow tables
+ */
+void nft_register_flowtable_type(struct nf_flowtable_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail_rcu(&type->list, &nf_tables_flowtables);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
+
+void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del_rcu(&type->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
+
+static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
+	[NFTA_FLOWTABLE_TABLE]		= { .type = NLA_STRING,
+					    .len = NFT_NAME_MAXLEN - 1 },
+	[NFTA_FLOWTABLE_NAME]		= { .type = NLA_STRING,
+					    .len = NFT_NAME_MAXLEN - 1 },
+	[NFTA_FLOWTABLE_HOOK]		= { .type = NLA_NESTED },
+	[NFTA_FLOWTABLE_HANDLE]		= { .type = NLA_U64 },
+	[NFTA_FLOWTABLE_FLAGS]		= { .type = NLA_U32 },
+};
+
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+					   const struct nlattr *nla, u8 genmask)
+{
+	struct nft_flowtable *flowtable;
+
+	list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+		if (!nla_strcmp(nla, flowtable->name) &&
+		    nft_active_genmask(flowtable, genmask))
+			return flowtable;
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
+
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+				    struct nft_flowtable *flowtable,
+				    enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE_ERROR:
+	case NFT_TRANS_PREPARE:
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		nft_use_dec(&flowtable->use);
+		fallthrough;
+	default:
+		return;
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
+static struct nft_flowtable *
+nft_flowtable_lookup_byhandle(const struct nft_table *table,
+			      const struct nlattr *nla, u8 genmask)
+{
+       struct nft_flowtable *flowtable;
+
+       list_for_each_entry(flowtable, &table->flowtables, list) {
+               if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
+                   nft_active_genmask(flowtable, genmask))
+                       return flowtable;
+       }
+       return ERR_PTR(-ENOENT);
+}
+
+struct nft_flowtable_hook {
+	u32			num;
+	int			priority;
+	struct list_head	list;
+};
+
+static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
+	[NFTA_FLOWTABLE_HOOK_NUM]	= { .type = NLA_U32 },
+	[NFTA_FLOWTABLE_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFTA_FLOWTABLE_HOOK_DEVS]	= { .type = NLA_NESTED },
+};
+
+static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
+				    const struct nlattr *attr,
+				    struct nft_flowtable_hook *flowtable_hook,
+				    struct nft_flowtable *flowtable, bool add)
+{
+	struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
+	struct nft_hook *hook;
+	int hooknum, priority;
+	int err;
+
+	INIT_LIST_HEAD(&flowtable_hook->list);
+
+	err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
+					  nft_flowtable_hook_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (add) {
+		if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
+		    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY])
+			return -EINVAL;
+
+		hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+		if (hooknum != NF_NETDEV_INGRESS)
+			return -EOPNOTSUPP;
+
+		priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+
+		flowtable_hook->priority	= priority;
+		flowtable_hook->num		= hooknum;
+	} else {
+		if (tb[NFTA_FLOWTABLE_HOOK_NUM]) {
+			hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+			if (hooknum != flowtable->hooknum)
+				return -EOPNOTSUPP;
+		}
+
+		if (tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) {
+			priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+			if (priority != flowtable->data.priority)
+				return -EOPNOTSUPP;
+		}
+
+		flowtable_hook->priority	= flowtable->data.priority;
+		flowtable_hook->num		= flowtable->hooknum;
+	}
+
+	if (tb[NFTA_FLOWTABLE_HOOK_DEVS]) {
+		err = nf_tables_parse_netdev_hooks(ctx->net,
+						   tb[NFTA_FLOWTABLE_HOOK_DEVS],
+						   &flowtable_hook->list);
+		if (err < 0)
+			return err;
+	}
+
+	list_for_each_entry(hook, &flowtable_hook->list, list) {
+		hook->ops.pf		= NFPROTO_NETDEV;
+		hook->ops.hooknum	= flowtable_hook->num;
+		hook->ops.priority	= flowtable_hook->priority;
+		hook->ops.priv		= &flowtable->data;
+		hook->ops.hook		= flowtable->data.type->hook;
+	}
+
+	return err;
+}
+
+static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+{
+	const struct nf_flowtable_type *type;
+
+	list_for_each_entry(type, &nf_tables_flowtables, list) {
+		if (family == type->family)
+			return type;
+	}
+	return NULL;
+}
+
+static const struct nf_flowtable_type *
+nft_flowtable_type_get(struct net *net, u8 family)
+{
+	const struct nf_flowtable_type *type;
+
+	type = __nft_flowtable_type_get(family);
+	if (type != NULL && try_module_get(type->owner))
+		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
+#ifdef CONFIG_MODULES
+	if (type == NULL) {
+		if (nft_request_module(net, "nf-flowtable-%u", family) == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+/* Only called from error and netdev event paths. */
+static void nft_unregister_flowtable_hook(struct net *net,
+					  struct nft_flowtable *flowtable,
+					  struct nft_hook *hook)
+{
+	nf_unregister_net_hook(net, &hook->ops);
+	flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+				    FLOW_BLOCK_UNBIND);
+}
+
+static void __nft_unregister_flowtable_net_hooks(struct net *net,
+						 struct list_head *hook_list,
+					         bool release_netdev)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		nf_unregister_net_hook(net, &hook->ops);
+		if (release_netdev) {
+			list_del(&hook->list);
+			kfree_rcu(hook, rcu);
+		}
+	}
+}
+
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+					       struct list_head *hook_list)
+{
+	__nft_unregister_flowtable_net_hooks(net, hook_list, false);
+}
+
+static int nft_register_flowtable_net_hooks(struct net *net,
+					    struct nft_table *table,
+					    struct list_head *hook_list,
+					    struct nft_flowtable *flowtable)
+{
+	struct nft_hook *hook, *hook2, *next;
+	struct nft_flowtable *ft;
+	int err, i = 0;
+
+	list_for_each_entry(hook, hook_list, list) {
+		list_for_each_entry(ft, &table->flowtables, list) {
+			if (!nft_is_active_next(net, ft))
+				continue;
+
+			list_for_each_entry(hook2, &ft->hook_list, list) {
+				if (hook->ops.dev == hook2->ops.dev &&
+				    hook->ops.pf == hook2->ops.pf) {
+					err = -EEXIST;
+					goto err_unregister_net_hooks;
+				}
+			}
+		}
+
+		err = flowtable->data.type->setup(&flowtable->data,
+						  hook->ops.dev,
+						  FLOW_BLOCK_BIND);
+		if (err < 0)
+			goto err_unregister_net_hooks;
+
+		err = nf_register_net_hook(net, &hook->ops);
+		if (err < 0) {
+			flowtable->data.type->setup(&flowtable->data,
+						    hook->ops.dev,
+						    FLOW_BLOCK_UNBIND);
+			goto err_unregister_net_hooks;
+		}
+
+		i++;
+	}
+
+	return 0;
+
+err_unregister_net_hooks:
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		if (i-- <= 0)
+			break;
+
+		nft_unregister_flowtable_hook(net, flowtable, hook);
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+
+	return err;
+}
+
+static void nft_flowtable_hooks_destroy(struct list_head *hook_list)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+}
+
+static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
+				struct nft_flowtable *flowtable)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_flowtable_hook flowtable_hook;
+	struct nft_hook *hook, *next;
+	struct nft_trans *trans;
+	bool unregister = false;
+	u32 flags;
+	int err;
+
+	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, flowtable, false);
+	if (err < 0)
+		return err;
+
+	list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
+		if (nft_hook_list_find(&flowtable->hook_list, hook)) {
+			list_del(&hook->list);
+			kfree(hook);
+		}
+	}
+
+	if (nla[NFTA_FLOWTABLE_FLAGS]) {
+		flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+		if (flags & ~NFT_FLOWTABLE_MASK) {
+			err = -EOPNOTSUPP;
+			goto err_flowtable_update_hook;
+		}
+		if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
+		    (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+			err = -EOPNOTSUPP;
+			goto err_flowtable_update_hook;
+		}
+	} else {
+		flags = flowtable->data.flags;
+	}
+
+	err = nft_register_flowtable_net_hooks(ctx->net, ctx->table,
+					       &flowtable_hook.list, flowtable);
+	if (err < 0)
+		goto err_flowtable_update_hook;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWFLOWTABLE,
+				sizeof(struct nft_trans_flowtable));
+	if (!trans) {
+		unregister = true;
+		err = -ENOMEM;
+		goto err_flowtable_update_hook;
+	}
+
+	nft_trans_flowtable_flags(trans) = flags;
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_flowtable_update(trans) = true;
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+	list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans));
+
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+
+err_flowtable_update_hook:
+	list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
+		if (unregister)
+			nft_unregister_flowtable_hook(ctx->net, flowtable, hook);
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+
+	return err;
+
+}
+
+static int nf_tables_newflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct nft_flowtable_hook flowtable_hook;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	const struct nf_flowtable_type *type;
+	struct nft_flowtable *flowtable;
+	struct nft_hook *hook, *next;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct nft_ctx ctx;
+	int err;
+
+	if (!nla[NFTA_FLOWTABLE_TABLE] ||
+	    !nla[NFTA_FLOWTABLE_NAME] ||
+	    !nla[NFTA_FLOWTABLE_HOOK])
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+					 genmask);
+	if (IS_ERR(flowtable)) {
+		err = PTR_ERR(flowtable);
+		if (err != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
+			return err;
+		}
+	} else {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
+			return -EEXIST;
+		}
+
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+		return nft_flowtable_update(&ctx, info->nlh, flowtable);
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (!nft_use_inc(&table->use))
+		return -EMFILE;
+
+	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
+	if (!flowtable) {
+		err = -ENOMEM;
+		goto flowtable_alloc;
+	}
+
+	flowtable->table = table;
+	flowtable->handle = nf_tables_alloc_handle(table);
+	INIT_LIST_HEAD(&flowtable->hook_list);
+
+	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
+	if (!flowtable->name) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	type = nft_flowtable_type_get(net, family);
+	if (IS_ERR(type)) {
+		err = PTR_ERR(type);
+		goto err2;
+	}
+
+	if (nla[NFTA_FLOWTABLE_FLAGS]) {
+		flowtable->data.flags =
+			ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+		if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) {
+			err = -EOPNOTSUPP;
+			goto err3;
+		}
+	}
+
+	write_pnet(&flowtable->data.net, net);
+	flowtable->data.type = type;
+	err = type->init(&flowtable->data);
+	if (err < 0)
+		goto err3;
+
+	err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, flowtable, true);
+	if (err < 0)
+		goto err4;
+
+	list_splice(&flowtable_hook.list, &flowtable->hook_list);
+	flowtable->data.priority = flowtable_hook.priority;
+	flowtable->hooknum = flowtable_hook.num;
+
+	err = nft_register_flowtable_net_hooks(ctx.net, table,
+					       &flowtable->hook_list,
+					       flowtable);
+	if (err < 0) {
+		nft_flowtable_hooks_destroy(&flowtable->hook_list);
+		goto err4;
+	}
+
+	err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+	if (err < 0)
+		goto err5;
+
+	list_add_tail_rcu(&flowtable->list, &table->flowtables);
+
+	return 0;
+err5:
+	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+		nft_unregister_flowtable_hook(net, flowtable, hook);
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+err4:
+	flowtable->data.type->free(&flowtable->data);
+err3:
+	module_put(type->owner);
+err2:
+	kfree(flowtable->name);
+err1:
+	kfree(flowtable);
+flowtable_alloc:
+	nft_use_dec_restore(&table->use);
+
+	return err;
+}
+
+static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook)
+{
+	struct nft_hook *this, *next;
+
+	list_for_each_entry_safe(this, next, &flowtable_hook->list, list) {
+		list_del(&this->list);
+		kfree(this);
+	}
+}
+
+static int nft_delflowtable_hook(struct nft_ctx *ctx,
+				 struct nft_flowtable *flowtable)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_flowtable_hook flowtable_hook;
+	LIST_HEAD(flowtable_del_list);
+	struct nft_hook *this, *hook;
+	struct nft_trans *trans;
+	int err;
+
+	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, flowtable, false);
+	if (err < 0)
+		return err;
+
+	list_for_each_entry(this, &flowtable_hook.list, list) {
+		hook = nft_hook_list_find(&flowtable->hook_list, this);
+		if (!hook) {
+			err = -ENOENT;
+			goto err_flowtable_del_hook;
+		}
+		list_move(&hook->list, &flowtable_del_list);
+	}
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
+				sizeof(struct nft_trans_flowtable));
+	if (!trans) {
+		err = -ENOMEM;
+		goto err_flowtable_del_hook;
+	}
+
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_flowtable_update(trans) = true;
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+	list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
+	nft_flowtable_hook_release(&flowtable_hook);
+
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+
+err_flowtable_del_hook:
+	list_splice(&flowtable_del_list, &flowtable->hook_list);
+	nft_flowtable_hook_release(&flowtable_hook);
+
+	return err;
+}
+
+static int nf_tables_delflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
+{
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_flowtable *flowtable;
+	struct net *net = info->net;
+	const struct nlattr *attr;
+	struct nft_table *table;
+	struct nft_ctx ctx;
+
+	if (!nla[NFTA_FLOWTABLE_TABLE] ||
+	    (!nla[NFTA_FLOWTABLE_NAME] &&
+	     !nla[NFTA_FLOWTABLE_HANDLE]))
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_FLOWTABLE_HANDLE]) {
+		attr = nla[NFTA_FLOWTABLE_HANDLE];
+		flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
+	} else {
+		attr = nla[NFTA_FLOWTABLE_NAME];
+		flowtable = nft_flowtable_lookup(table, attr, genmask);
+	}
+
+	if (IS_ERR(flowtable)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(flowtable);
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (nla[NFTA_FLOWTABLE_HOOK])
+		return nft_delflowtable_hook(&ctx, flowtable);
+
+	if (flowtable->use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return -EBUSY;
+	}
+
+	return nft_delflowtable(&ctx, flowtable);
+}
+
+static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
+					 u32 portid, u32 seq, int event,
+					 u32 flags, int family,
+					 struct nft_flowtable *flowtable,
+					 struct list_head *hook_list)
+{
+	struct nlattr *nest, *nest_devs;
+	struct nft_hook *hook;
+	struct nlmsghdr *nlh;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
+	    nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+	    nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+	    nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+			 NFTA_FLOWTABLE_PAD) ||
+	    nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
+		goto nla_put_failure;
+
+	nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
+	if (!nest)
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
+	    nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
+		goto nla_put_failure;
+
+	nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
+	if (!nest_devs)
+		goto nla_put_failure;
+
+	list_for_each_entry_rcu(hook, hook_list, list) {
+		if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest_devs);
+	nla_nest_end(skb, nest);
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+struct nft_flowtable_filter {
+	char		*table;
+};
+
+static int nf_tables_dump_flowtable(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nft_flowtable_filter *filter = cb->data;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int family = nfmsg->nfgen_family;
+	struct nft_flowtable *flowtable;
+	struct nftables_pernet *nft_net;
+	const struct nft_table *table;
+
+	rcu_read_lock();
+	nft_net = nft_pernet(net);
+	cb->seq = READ_ONCE(nft_net->base_seq);
+
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
+		if (family != NFPROTO_UNSPEC && family != table->family)
+			continue;
+
+		list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+			if (!nft_is_active(net, flowtable))
+				goto cont;
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (filter && filter->table &&
+			    strcmp(filter->table, table->name))
+				goto cont;
+
+			if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
+							  cb->nlh->nlmsg_seq,
+							  NFT_MSG_NEWFLOWTABLE,
+							  NLM_F_MULTI | NLM_F_APPEND,
+							  table->family,
+							  flowtable,
+							  &flowtable->hook_list) < 0)
+				goto done;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+	}
+done:
+	rcu_read_unlock();
+
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_dump_flowtable_start(struct netlink_callback *cb)
+{
+	const struct nlattr * const *nla = cb->data;
+	struct nft_flowtable_filter *filter = NULL;
+
+	if (nla[NFTA_FLOWTABLE_TABLE]) {
+		filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+		if (!filter)
+			return -ENOMEM;
+
+		filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
+					   GFP_ATOMIC);
+		if (!filter->table) {
+			kfree(filter);
+			return -ENOMEM;
+		}
+	}
+
+	cb->data = filter;
+	return 0;
+}
+
+static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
+{
+	struct nft_flowtable_filter *filter = cb->data;
+
+	if (!filter)
+		return 0;
+
+	kfree(filter->table);
+	kfree(filter);
+
+	return 0;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
+{
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_flowtable *flowtable;
+	const struct nft_table *table;
+	struct net *net = info->net;
+	struct sk_buff *skb2;
+	int err;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_flowtable_start,
+			.dump = nf_tables_dump_flowtable,
+			.done = nf_tables_dump_flowtable_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!nla[NFTA_FLOWTABLE_NAME])
+		return -EINVAL;
+
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask, 0);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+					 genmask);
+	if (IS_ERR(flowtable))
+		return PTR_ERR(flowtable);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
+					    info->nlh->nlmsg_seq,
+					    NFT_MSG_NEWFLOWTABLE, 0, family,
+					    flowtable, &flowtable->hook_list);
+	if (err < 0)
+		goto err_fill_flowtable_info;
+
+	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+
+err_fill_flowtable_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
+				       struct nft_flowtable *flowtable,
+				       struct list_head *hook_list,
+				       int event)
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	struct sk_buff *skb;
+	u16 flags = 0;
+	int err;
+
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+		return;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+		flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+	err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
+					    ctx->seq, event, flags,
+					    ctx->family, flowtable, hook_list);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
+	return;
+err:
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+{
+	struct nft_hook *hook, *next;
+
+	flowtable->data.type->free(&flowtable->data);
+	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+		flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+					    FLOW_BLOCK_UNBIND);
+		list_del_rcu(&hook->list);
+		kfree(hook);
+	}
+	kfree(flowtable->name);
+	module_put(flowtable->data.type->owner);
+	kfree(flowtable);
+}
+
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+				   u32 portid, u32 seq)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nlmsghdr *nlh;
+	char buf[TASK_COMM_LEN];
+	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
+
+	nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
+	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static void nft_flowtable_event(unsigned long event, struct net_device *dev,
+				struct nft_flowtable *flowtable)
+{
+	struct nft_hook *hook;
+
+	list_for_each_entry(hook, &flowtable->hook_list, list) {
+		if (hook->ops.dev != dev)
+			continue;
+
+		/* flow_offload_netdev_event() cleans up entries for us. */
+		nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+		break;
+	}
+}
+
+static int nf_tables_flowtable_event(struct notifier_block *this,
+				     unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nft_flowtable *flowtable;
+	struct nftables_pernet *nft_net;
+	struct nft_table *table;
+	struct net *net;
+
+	if (event != NETDEV_UNREGISTER)
+		return 0;
+
+	net = dev_net(dev);
+	nft_net = nft_pernet(net);
+	mutex_lock(&nft_net->commit_mutex);
+	list_for_each_entry(table, &nft_net->tables, list) {
+		list_for_each_entry(flowtable, &table->flowtables, list) {
+			nft_flowtable_event(event, dev, flowtable);
+		}
+	}
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_flowtable_notifier = {
+	.notifier_call	= nf_tables_flowtable_event,
+};
+
+static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
+				 int event)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct sk_buff *skb2;
+	int err;
+
+	if (!nlmsg_report(nlh) &&
+	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return;
+
+	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		goto err;
+
+	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+				      nlh->nlmsg_seq);
+	if (err < 0) {
+		kfree_skb(skb2);
+		goto err;
+	}
+
+	nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+		       nlmsg_report(nlh), GFP_KERNEL);
+	return;
+err:
+	nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+			  -ENOBUFS);
+}
+
+static int nf_tables_getgen(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	struct sk_buff *skb2;
+	int err;
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	err = nf_tables_fill_gen_info(skb2, info->net, NETLINK_CB(skb).portid,
+				      info->nlh->nlmsg_seq);
+	if (err < 0)
+		goto err_fill_gen_info;
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+
+err_fill_gen_info:
+	kfree_skb(skb2);
+	return err;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+	[NFT_MSG_NEWTABLE] = {
+		.call		= nf_tables_newtable,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_GETTABLE] = {
+		.call		= nf_tables_gettable,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_DELTABLE] = {
+		.call		= nf_tables_deltable,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_NEWCHAIN] = {
+		.call		= nf_tables_newchain,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_GETCHAIN] = {
+		.call		= nf_tables_getchain,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_DELCHAIN] = {
+		.call		= nf_tables_delchain,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_NEWRULE] = {
+		.call		= nf_tables_newrule,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_GETRULE] = {
+		.call		= nf_tables_getrule,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_DELRULE] = {
+		.call		= nf_tables_delrule,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_NEWSET] = {
+		.call		= nf_tables_newset,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_GETSET] = {
+		.call		= nf_tables_getset,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_DELSET] = {
+		.call		= nf_tables_delset,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_SET_MAX,
+		.policy		= nft_set_policy,
+	},
+	[NFT_MSG_NEWSETELEM] = {
+		.call		= nf_tables_newsetelem,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+	[NFT_MSG_GETSETELEM] = {
+		.call		= nf_tables_getsetelem,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+	[NFT_MSG_DELSETELEM] = {
+		.call		= nf_tables_delsetelem,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
+		.policy		= nft_set_elem_list_policy,
+	},
+	[NFT_MSG_GETGEN] = {
+		.call		= nf_tables_getgen,
+		.type		= NFNL_CB_RCU,
+	},
+	[NFT_MSG_NEWOBJ] = {
+		.call		= nf_tables_newobj,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_OBJ_MAX,
+		.policy		= nft_obj_policy,
+	},
+	[NFT_MSG_GETOBJ] = {
+		.call		= nf_tables_getobj,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_OBJ_MAX,
+		.policy		= nft_obj_policy,
+	},
+	[NFT_MSG_DELOBJ] = {
+		.call		= nf_tables_delobj,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_OBJ_MAX,
+		.policy		= nft_obj_policy,
+	},
+	[NFT_MSG_GETOBJ_RESET] = {
+		.call		= nf_tables_getobj,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_OBJ_MAX,
+		.policy		= nft_obj_policy,
+	},
+	[NFT_MSG_NEWFLOWTABLE] = {
+		.call		= nf_tables_newflowtable,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_FLOWTABLE_MAX,
+		.policy		= nft_flowtable_policy,
+	},
+	[NFT_MSG_GETFLOWTABLE] = {
+		.call		= nf_tables_getflowtable,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_FLOWTABLE_MAX,
+		.policy		= nft_flowtable_policy,
+	},
+	[NFT_MSG_DELFLOWTABLE] = {
+		.call		= nf_tables_delflowtable,
+		.type		= NFNL_CB_BATCH,
+		.attr_count	= NFTA_FLOWTABLE_MAX,
+		.policy		= nft_flowtable_policy,
+	},
+};
+
+static int nf_tables_validate(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_table *table;
+
+	switch (nft_net->validate_state) {
+	case NFT_VALIDATE_SKIP:
+		break;
+	case NFT_VALIDATE_NEED:
+		nft_validate_state_update(net, NFT_VALIDATE_DO);
+		fallthrough;
+	case NFT_VALIDATE_DO:
+		list_for_each_entry(table, &nft_net->tables, list) {
+			if (nft_table_validate(net, table) < 0)
+				return -EAGAIN;
+		}
+
+		nft_validate_state_update(net, NFT_VALIDATE_SKIP);
+		break;
+	}
+
+	return 0;
+}
+
+/* a drop policy has to be deferred until all rules have been activated,
+ * otherwise a large ruleset that contains a drop-policy base chain will
+ * cause all packets to get dropped until the full transaction has been
+ * processed.
+ *
+ * We defer the drop policy until the transaction has been finalized.
+ */
+static void nft_chain_commit_drop_policy(struct nft_trans *trans)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_trans_chain_policy(trans) != NF_DROP)
+		return;
+
+	if (!nft_is_base_chain(trans->ctx.chain))
+		return;
+
+	basechain = nft_base_chain(trans->ctx.chain);
+	basechain->policy = NF_DROP;
+}
+
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_trans_chain_name(trans)) {
+		rhltable_remove(&trans->ctx.table->chains_ht,
+				&trans->ctx.chain->rhlhead,
+				nft_chain_ht_params);
+		swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
+		rhltable_insert_key(&trans->ctx.table->chains_ht,
+				    trans->ctx.chain->name,
+				    &trans->ctx.chain->rhlhead,
+				    nft_chain_ht_params);
+	}
+
+	if (!nft_is_base_chain(trans->ctx.chain))
+		return;
+
+	nft_chain_stats_replace(trans);
+
+	basechain = nft_base_chain(trans->ctx.chain);
+
+	switch (nft_trans_chain_policy(trans)) {
+	case NF_DROP:
+	case NF_ACCEPT:
+		basechain->policy = nft_trans_chain_policy(trans);
+		break;
+	}
+}
+
+static void nft_obj_commit_update(struct nft_trans *trans)
+{
+	struct nft_object *newobj;
+	struct nft_object *obj;
+
+	obj = nft_trans_obj(trans);
+	newobj = nft_trans_obj_newobj(trans);
+
+	if (obj->ops->update)
+		obj->ops->update(obj, newobj);
+
+	nft_obj_destroy(&trans->ctx, newobj);
+}
+
+static void nft_commit_release(struct nft_trans *trans)
+{
+	switch (trans->msg_type) {
+	case NFT_MSG_DELTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_NEWCHAIN:
+		free_percpu(nft_trans_chain_stats(trans));
+		kfree(nft_trans_chain_name(trans));
+		break;
+	case NFT_MSG_DELCHAIN:
+		nf_tables_chain_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_DELRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_DELSET:
+		nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+		break;
+	case NFT_MSG_DELSETELEM:
+		nf_tables_set_elem_destroy(&trans->ctx,
+					   nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
+		break;
+	case NFT_MSG_DELOBJ:
+		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+		break;
+	case NFT_MSG_DELFLOWTABLE:
+		if (nft_trans_flowtable_update(trans))
+			nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+		else
+			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+		break;
+	}
+
+	if (trans->put_net)
+		put_net(trans->ctx.net);
+
+	kfree(trans);
+}
+
+static void nf_tables_trans_destroy_work(struct work_struct *w)
+{
+	struct nft_trans *trans, *next;
+	LIST_HEAD(head);
+
+	spin_lock(&nf_tables_destroy_list_lock);
+	list_splice_init(&nf_tables_destroy_list, &head);
+	spin_unlock(&nf_tables_destroy_list_lock);
+
+	if (list_empty(&head))
+		return;
+
+	synchronize_rcu();
+
+	list_for_each_entry_safe(trans, next, &head, list) {
+		nft_trans_list_del(trans);
+		nft_commit_release(trans);
+	}
+}
+
+void nf_tables_trans_destroy_flush_work(void)
+{
+	flush_work(&trans_destroy_work);
+}
+EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
+
+static bool nft_expr_reduce(struct nft_regs_track *track,
+			    const struct nft_expr *expr)
+{
+	return false;
+}
+
+static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
+{
+	const struct nft_expr *expr, *last;
+	struct nft_regs_track track = {};
+	unsigned int size, data_size;
+	void *data, *data_boundary;
+	struct nft_rule_dp *prule;
+	struct nft_rule *rule;
+
+	/* already handled or inactive chain? */
+	if (chain->blob_next || !nft_is_active_next(net, chain))
+		return 0;
+
+	data_size = 0;
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (nft_is_active_next(net, rule)) {
+			data_size += sizeof(*prule) + rule->dlen;
+			if (data_size > INT_MAX)
+				return -ENOMEM;
+		}
+	}
+	data_size += offsetof(struct nft_rule_dp, data);	/* last rule */
+
+	chain->blob_next = nf_tables_chain_alloc_rules(data_size);
+	if (!chain->blob_next)
+		return -ENOMEM;
+
+	data = (void *)chain->blob_next->data;
+	data_boundary = data + data_size;
+	size = 0;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (!nft_is_active_next(net, rule))
+			continue;
+
+		prule = (struct nft_rule_dp *)data;
+		data += offsetof(struct nft_rule_dp, data);
+		if (WARN_ON_ONCE(data > data_boundary))
+			return -ENOMEM;
+
+		size = 0;
+		track.last = nft_expr_last(rule);
+		nft_rule_for_each_expr(expr, last, rule) {
+			track.cur = expr;
+
+			if (nft_expr_reduce(&track, expr)) {
+				expr = track.cur;
+				continue;
+			}
+
+			if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary))
+				return -ENOMEM;
+
+			memcpy(data + size, expr, expr->ops->size);
+			size += expr->ops->size;
+		}
+		if (WARN_ON_ONCE(size >= 1 << 12))
+			return -ENOMEM;
+
+		prule->handle = rule->handle;
+		prule->dlen = size;
+		prule->is_last = 0;
+
+		data += size;
+		size = 0;
+		chain->blob_next->size += (unsigned long)(data - (void *)prule);
+	}
+
+	prule = (struct nft_rule_dp *)data;
+	data += offsetof(struct nft_rule_dp, data);
+	if (WARN_ON_ONCE(data > data_boundary))
+		return -ENOMEM;
+
+	nft_last_rule(chain->blob_next, prule);
+
+	return 0;
+}
+
+static void nf_tables_commit_chain_prepare_cancel(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_trans *trans, *next;
+
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+		struct nft_chain *chain = trans->ctx.chain;
+
+		if (trans->msg_type == NFT_MSG_NEWRULE ||
+		    trans->msg_type == NFT_MSG_DELRULE) {
+			kvfree(chain->blob_next);
+			chain->blob_next = NULL;
+		}
+	}
+}
+
+static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
+{
+	struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
+
+	kvfree(o->blob);
+}
+
+static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob)
+{
+	struct nft_rules_old *old;
+
+	/* rcu_head is after end marker */
+	old = (void *)blob + sizeof(*blob) + blob->size;
+	old->blob = blob;
+
+	call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
+}
+
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
+{
+	struct nft_rule_blob *g0, *g1;
+	bool next_genbit;
+
+	next_genbit = nft_gencursor_next(net);
+
+	g0 = rcu_dereference_protected(chain->blob_gen_0,
+				       lockdep_commit_lock_is_held(net));
+	g1 = rcu_dereference_protected(chain->blob_gen_1,
+				       lockdep_commit_lock_is_held(net));
+
+	/* No changes to this chain? */
+	if (chain->blob_next == NULL) {
+		/* chain had no change in last or next generation */
+		if (g0 == g1)
+			return;
+		/*
+		 * chain had no change in this generation; make sure next
+		 * one uses same rules as current generation.
+		 */
+		if (next_genbit) {
+			rcu_assign_pointer(chain->blob_gen_1, g0);
+			nf_tables_commit_chain_free_rules_old(g1);
+		} else {
+			rcu_assign_pointer(chain->blob_gen_0, g1);
+			nf_tables_commit_chain_free_rules_old(g0);
+		}
+
+		return;
+	}
+
+	if (next_genbit)
+		rcu_assign_pointer(chain->blob_gen_1, chain->blob_next);
+	else
+		rcu_assign_pointer(chain->blob_gen_0, chain->blob_next);
+
+	chain->blob_next = NULL;
+
+	if (g0 == g1)
+		return;
+
+	if (next_genbit)
+		nf_tables_commit_chain_free_rules_old(g1);
+	else
+		nf_tables_commit_chain_free_rules_old(g0);
+}
+
+static void nft_obj_del(struct nft_object *obj)
+{
+	rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params);
+	list_del_rcu(&obj->list);
+}
+
+void nft_chain_del(struct nft_chain *chain)
+{
+	struct nft_table *table = chain->table;
+
+	WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead,
+				     nft_chain_ht_params));
+	list_del_rcu(&chain->list);
+}
+
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+					struct nft_trans_gc *trans)
+{
+	void **priv = trans->priv;
+	unsigned int i;
+
+	for (i = 0; i < trans->count; i++) {
+		struct nft_set_elem elem = {
+			.priv = priv[i],
+		};
+
+		nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+		nft_setelem_remove(ctx->net, trans->set, &elem);
+	}
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+	nft_set_put(trans->set);
+	put_net(trans->net);
+	kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+	struct nft_set_elem elem = {};
+	struct nft_trans_gc *trans;
+	struct nft_ctx ctx = {};
+	unsigned int i;
+
+	trans = container_of(rcu, struct nft_trans_gc, rcu);
+	ctx.net	= read_pnet(&trans->set->net);
+
+	for (i = 0; i < trans->count; i++) {
+		elem.priv = trans->priv[i];
+		if (!nft_setelem_is_catchall(trans->set, &elem))
+			atomic_dec(&trans->set->nelems);
+
+		nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+	}
+
+	nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+	struct nftables_pernet *nft_net;
+	struct nft_ctx ctx = {};
+
+	nft_net = nft_pernet(trans->net);
+
+	mutex_lock(&nft_net->commit_mutex);
+
+	/* Check for race with transaction, otherwise this batch refers to
+	 * stale objects that might not be there anymore. Skip transaction if
+	 * set has been destroyed from control plane transaction in case gc
+	 * worker loses race.
+	 */
+	if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+		mutex_unlock(&nft_net->commit_mutex);
+		return false;
+	}
+
+	ctx.net = trans->net;
+	ctx.table = trans->set->table;
+
+	nft_trans_gc_setelem_remove(&ctx, trans);
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+	struct nft_trans_gc *trans, *next;
+	LIST_HEAD(trans_gc_list);
+
+	spin_lock(&nf_tables_gc_list_lock);
+	list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+	spin_unlock(&nf_tables_gc_list_lock);
+
+	list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+		list_del(&trans->list);
+		if (!nft_trans_gc_work_done(trans)) {
+			nft_trans_gc_destroy(trans);
+			continue;
+		}
+		call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+	}
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+					unsigned int gc_seq, gfp_t gfp)
+{
+	struct net *net = read_pnet(&set->net);
+	struct nft_trans_gc *trans;
+
+	trans = kzalloc(sizeof(*trans), gfp);
+	if (!trans)
+		return NULL;
+
+	trans->net = maybe_get_net(net);
+	if (!trans->net) {
+		kfree(trans);
+		return NULL;
+	}
+
+	refcount_inc(&set->refs);
+	trans->set = set;
+	trans->seq = gc_seq;
+
+	return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+	trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+	spin_lock(&nf_tables_gc_list_lock);
+	list_add_tail(&trans->list, &nf_tables_gc_list);
+	spin_unlock(&nf_tables_gc_list_lock);
+
+	schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+	return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+					      unsigned int gc_seq, gfp_t gfp)
+{
+	struct nft_set *set;
+
+	if (nft_trans_gc_space(gc))
+		return gc;
+
+	set = gc->set;
+	nft_trans_gc_queue_work(gc);
+
+	return nft_trans_gc_alloc(set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+	if (trans->count == 0) {
+		nft_trans_gc_destroy(trans);
+		return;
+	}
+
+	nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+	struct nft_set *set;
+
+	if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+		return NULL;
+
+	if (nft_trans_gc_space(gc))
+		return gc;
+
+	set = gc->set;
+	call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+	return nft_trans_gc_alloc(set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+	WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+	if (trans->count == 0) {
+		nft_trans_gc_destroy(trans);
+		return;
+	}
+
+	call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+						 unsigned int gc_seq)
+{
+	struct nft_set_elem_catchall *catchall;
+	const struct nft_set *set = gc->set;
+	struct nft_set_ext *ext;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+
+		if (!nft_set_elem_expired(ext))
+			continue;
+		if (nft_set_elem_is_dead(ext))
+			goto dead_elem;
+
+		nft_set_elem_dead(ext);
+dead_elem:
+		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+		if (!gc)
+			return NULL;
+
+		nft_trans_gc_elem_add(gc, catchall->elem);
+	}
+
+	return gc;
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
+{
+	struct nft_set_elem_catchall *catchall, *next;
+	const struct nft_set *set = gc->set;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+
+	WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
+
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+
+		if (!nft_set_elem_expired(ext))
+			continue;
+
+		gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+		if (!gc)
+			return NULL;
+
+		memset(&elem, 0, sizeof(elem));
+		elem.priv = catchall->elem;
+
+		nft_setelem_data_deactivate(gc->net, gc->set, &elem);
+		nft_setelem_catchall_destroy(catchall);
+		nft_trans_gc_elem_add(gc, elem.priv);
+	}
+
+	return gc;
+}
+
+static void nf_tables_module_autoload_cleanup(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_module_request *req, *next;
+
+	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+	list_for_each_entry_safe(req, next, &nft_net->module_list, list) {
+		WARN_ON_ONCE(!req->done);
+		list_del(&req->list);
+		kfree(req);
+	}
+}
+
+static void nf_tables_commit_release(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_trans *trans;
+
+	/* all side effects have to be made visible.
+	 * For example, if a chain named 'foo' has been deleted, a
+	 * new transaction must not find it anymore.
+	 *
+	 * Memory reclaim happens asynchronously from work queue
+	 * to prevent expensive synchronize_rcu() in commit phase.
+	 */
+	if (list_empty(&nft_net->commit_list)) {
+		nf_tables_module_autoload_cleanup(net);
+		mutex_unlock(&nft_net->commit_mutex);
+		return;
+	}
+
+	trans = list_last_entry(&nft_net->commit_list,
+				struct nft_trans, list);
+	get_net(trans->ctx.net);
+	WARN_ON_ONCE(trans->put_net);
+
+	trans->put_net = true;
+	spin_lock(&nf_tables_destroy_list_lock);
+	list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
+	spin_unlock(&nf_tables_destroy_list_lock);
+
+	nf_tables_module_autoload_cleanup(net);
+	schedule_work(&trans_destroy_work);
+
+	mutex_unlock(&nft_net->commit_mutex);
+}
+
+static void nft_commit_notify(struct net *net, u32 portid)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct sk_buff *batch_skb = NULL, *nskb, *skb;
+	unsigned char *data;
+	int len;
+
+	list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) {
+		if (!batch_skb) {
+new_batch:
+			batch_skb = skb;
+			len = NLMSG_GOODSIZE - skb->len;
+			list_del(&skb->list);
+			continue;
+		}
+		len -= skb->len;
+		if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) {
+			data = skb_put(batch_skb, skb->len);
+			memcpy(data, skb->data, skb->len);
+			list_del(&skb->list);
+			kfree_skb(skb);
+			continue;
+		}
+		nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
+			       NFT_CB(batch_skb).report, GFP_KERNEL);
+		goto new_batch;
+	}
+
+	if (batch_skb) {
+		nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
+			       NFT_CB(batch_skb).report, GFP_KERNEL);
+	}
+
+	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
+}
+
+static int nf_tables_commit_audit_alloc(struct list_head *adl,
+					struct nft_table *table)
+{
+	struct nft_audit_data *adp;
+
+	list_for_each_entry(adp, adl, list) {
+		if (adp->table == table)
+			return 0;
+	}
+	adp = kzalloc(sizeof(*adp), GFP_KERNEL);
+	if (!adp)
+		return -ENOMEM;
+	adp->table = table;
+	list_add(&adp->list, adl);
+	return 0;
+}
+
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+	struct nft_audit_data *adp, *adn;
+
+	list_for_each_entry_safe(adp, adn, adl, list) {
+		list_del(&adp->list);
+		kfree(adp);
+	}
+}
+
+static void nf_tables_commit_audit_collect(struct list_head *adl,
+					   struct nft_table *table, u32 op)
+{
+	struct nft_audit_data *adp;
+
+	list_for_each_entry(adp, adl, list) {
+		if (adp->table == table)
+			goto found;
+	}
+	WARN_ONCE(1, "table=%s not expected in commit list", table->name);
+	return;
+found:
+	adp->entries++;
+	if (!adp->op || adp->op > op)
+		adp->op = op;
+}
+
+#define AUNFTABLENAMELEN (NFT_TABLE_MAXNAMELEN + 22)
+
+static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
+{
+	struct nft_audit_data *adp, *adn;
+	char aubuf[AUNFTABLENAMELEN];
+
+	list_for_each_entry_safe(adp, adn, adl, list) {
+		snprintf(aubuf, AUNFTABLENAMELEN, "%s:%u", adp->table->name,
+			 generation);
+		audit_log_nfcfg(aubuf, adp->table->family, adp->entries,
+				nft2audit_op[adp->op], GFP_KERNEL);
+		list_del(&adp->list);
+		kfree(adp);
+	}
+}
+
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+	struct nft_set *set, *next;
+
+	list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+		list_del_init(&set->pending_update);
+
+		if (!set->ops->commit || set->dead)
+			continue;
+
+		set->ops->commit(set);
+	}
+}
+
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+	unsigned int gc_seq;
+
+	/* Bump gc counter, it becomes odd, this is the busy mark. */
+	gc_seq = READ_ONCE(nft_net->gc_seq);
+	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+	return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
+static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_trans *trans, *next;
+	unsigned int base_seq, gc_seq;
+	LIST_HEAD(set_update_list);
+	struct nft_trans_elem *te;
+	struct nft_chain *chain;
+	struct nft_table *table;
+	LIST_HEAD(adl);
+	int err;
+
+	if (list_empty(&nft_net->commit_list)) {
+		mutex_unlock(&nft_net->commit_mutex);
+		return 0;
+	}
+
+	list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWSET:
+			if (!nft_trans_set_update(trans) &&
+			    nft_set_is_anonymous(nft_trans_set(trans)) &&
+			    !nft_trans_set_bound(trans)) {
+				pr_warn_once("nftables ruleset with unbound set\n");
+				return -EINVAL;
+			}
+			break;
+		case NFT_MSG_NEWCHAIN:
+			if (!nft_trans_chain_update(trans) &&
+			    nft_chain_binding(nft_trans_chain(trans)) &&
+			    !nft_trans_chain_bound(trans)) {
+				pr_warn_once("nftables ruleset with unbound chain\n");
+				return -EINVAL;
+			}
+			break;
+		}
+	}
+
+	/* 0. Validate ruleset, otherwise roll back for error reporting. */
+	if (nf_tables_validate(net) < 0)
+		return -EAGAIN;
+
+	err = nft_flow_rule_offload_commit(net);
+	if (err < 0)
+		return err;
+
+	/* 1.  Allocate space for next generation rules_gen_X[] */
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+		int ret;
+
+		ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
+		if (ret) {
+			nf_tables_commit_chain_prepare_cancel(net);
+			nf_tables_commit_audit_free(&adl);
+			return ret;
+		}
+		if (trans->msg_type == NFT_MSG_NEWRULE ||
+		    trans->msg_type == NFT_MSG_DELRULE) {
+			chain = trans->ctx.chain;
+
+			ret = nf_tables_commit_chain_prepare(net, chain);
+			if (ret < 0) {
+				nf_tables_commit_chain_prepare_cancel(net);
+				nf_tables_commit_audit_free(&adl);
+				return ret;
+			}
+		}
+	}
+
+	/* step 2.  Make rules_gen_X visible to packet path */
+	list_for_each_entry(table, &nft_net->tables, list) {
+		list_for_each_entry(chain, &table->chains, list)
+			nf_tables_commit_chain(net, chain);
+	}
+
+	/*
+	 * Bump generation counter, invalidate any dump in progress.
+	 * Cannot fail after this point.
+	 */
+	base_seq = READ_ONCE(nft_net->base_seq);
+	while (++base_seq == 0)
+		;
+
+	WRITE_ONCE(nft_net->base_seq, base_seq);
+
+	gc_seq = nft_gc_seq_begin(nft_net);
+
+	/* step 3. Start new generation, rules_gen_X now in use. */
+	net->nft.gencursor = nft_gencursor_next(net);
+
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+		nf_tables_commit_audit_collect(&adl, trans->ctx.table,
+					       trans->msg_type);
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+					nft_trans_destroy(trans);
+					break;
+				}
+				if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
+					nf_tables_table_disable(net, trans->ctx.table);
+
+				trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+			} else {
+				nft_clear(net, trans->ctx.table);
+			}
+			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELTABLE:
+			list_del_rcu(&trans->ctx.table->list);
+			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+			break;
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans)) {
+				nft_chain_commit_update(trans);
+				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+				/* trans destroyed after rcu grace period */
+			} else {
+				nft_chain_commit_drop_policy(trans);
+				nft_clear(net, trans->ctx.chain);
+				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+				nft_trans_destroy(trans);
+			}
+			break;
+		case NFT_MSG_DELCHAIN:
+			nft_chain_del(trans->ctx.chain);
+			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+			nf_tables_unregister_hook(trans->ctx.net,
+						  trans->ctx.table,
+						  trans->ctx.chain);
+			break;
+		case NFT_MSG_NEWRULE:
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
+			nf_tables_rule_notify(&trans->ctx,
+					      nft_trans_rule(trans),
+					      NFT_MSG_NEWRULE);
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELRULE:
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			nf_tables_rule_notify(&trans->ctx,
+					      nft_trans_rule(trans),
+					      NFT_MSG_DELRULE);
+			nft_rule_expr_deactivate(&trans->ctx,
+						 nft_trans_rule(trans),
+						 NFT_TRANS_COMMIT);
+
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+			break;
+		case NFT_MSG_NEWSET:
+			if (nft_trans_set_update(trans)) {
+				struct nft_set *set = nft_trans_set(trans);
+
+				WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+				WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+			} else {
+				nft_clear(net, nft_trans_set(trans));
+				/* This avoids hitting -EBUSY when deleting the table
+				 * from the transaction.
+				 */
+				if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+				    !list_empty(&nft_trans_set(trans)->bindings))
+					nft_use_dec(&trans->ctx.table->use);
+			}
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_NEWSET, GFP_KERNEL);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSET:
+			nft_trans_set(trans)->dead = 1;
+			list_del_rcu(&nft_trans_set(trans)->list);
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_DELSET, GFP_KERNEL);
+			break;
+		case NFT_MSG_NEWSETELEM:
+			te = (struct nft_trans_elem *)trans->data;
+
+			nft_setelem_activate(net, te->set, &te->elem);
+			nf_tables_setelem_notify(&trans->ctx, te->set,
+						 &te->elem,
+						 NFT_MSG_NEWSETELEM);
+			if (te->set->ops->commit &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSETELEM:
+			te = (struct nft_trans_elem *)trans->data;
+
+			nf_tables_setelem_notify(&trans->ctx, te->set,
+						 &te->elem,
+						 NFT_MSG_DELSETELEM);
+			nft_setelem_remove(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem)) {
+				atomic_dec(&te->set->nelems);
+				te->set->ndeact--;
+			}
+			if (te->set->ops->commit &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
+			break;
+		case NFT_MSG_NEWOBJ:
+			if (nft_trans_obj_update(trans)) {
+				nft_obj_commit_update(trans);
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+			} else {
+				nft_clear(net, nft_trans_obj(trans));
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+				nft_trans_destroy(trans);
+			}
+			break;
+		case NFT_MSG_DELOBJ:
+			nft_obj_del(nft_trans_obj(trans));
+			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+					     NFT_MSG_DELOBJ);
+			break;
+		case NFT_MSG_NEWFLOWTABLE:
+			if (nft_trans_flowtable_update(trans)) {
+				nft_trans_flowtable(trans)->data.flags =
+					nft_trans_flowtable_flags(trans);
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable_hooks(trans),
+							   NFT_MSG_NEWFLOWTABLE);
+				list_splice(&nft_trans_flowtable_hooks(trans),
+					    &nft_trans_flowtable(trans)->hook_list);
+			} else {
+				nft_clear(net, nft_trans_flowtable(trans));
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable(trans)->hook_list,
+							   NFT_MSG_NEWFLOWTABLE);
+			}
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELFLOWTABLE:
+			if (nft_trans_flowtable_update(trans)) {
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable_hooks(trans),
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+								   &nft_trans_flowtable_hooks(trans));
+			} else {
+				list_del_rcu(&nft_trans_flowtable(trans)->list);
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable(trans)->hook_list,
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable(trans)->hook_list);
+			}
+			break;
+		}
+	}
+
+	nft_set_commit_update(&set_update_list);
+
+	nft_commit_notify(net, NETLINK_CB(skb).portid);
+	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+	nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+	nft_gc_seq_end(nft_net, gc_seq);
+	nf_tables_commit_release(net);
+
+	return 0;
+}
+
+static void nf_tables_module_autoload(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_module_request *req, *next;
+	LIST_HEAD(module_list);
+
+	list_splice_init(&nft_net->module_list, &module_list);
+	mutex_unlock(&nft_net->commit_mutex);
+	list_for_each_entry_safe(req, next, &module_list, list) {
+		request_module("%s", req->module);
+		req->done = true;
+	}
+	mutex_lock(&nft_net->commit_mutex);
+	list_splice(&module_list, &nft_net->module_list);
+}
+
+static void nf_tables_abort_release(struct nft_trans *trans)
+{
+	switch (trans->msg_type) {
+	case NFT_MSG_NEWTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_NEWCHAIN:
+		nf_tables_chain_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_NEWRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_NEWSET:
+		nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+		break;
+	case NFT_MSG_NEWSETELEM:
+		nft_set_elem_destroy(nft_trans_elem_set(trans),
+				     nft_trans_elem(trans).priv, true);
+		break;
+	case NFT_MSG_NEWOBJ:
+		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+		break;
+	case NFT_MSG_NEWFLOWTABLE:
+		if (nft_trans_flowtable_update(trans))
+			nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+		else
+			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+		break;
+	}
+	kfree(trans);
+}
+
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+	struct nft_set *set, *next;
+
+	list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+		list_del_init(&set->pending_update);
+
+		if (!set->ops->abort)
+			continue;
+
+		set->ops->abort(set);
+	}
+}
+
+static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_trans *trans, *next;
+	LIST_HEAD(set_update_list);
+	struct nft_trans_elem *te;
+
+	if (action == NFNL_ABORT_VALIDATE &&
+	    nf_tables_validate(net) < 0)
+		return -EAGAIN;
+
+	list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
+					 list) {
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+					nft_trans_destroy(trans);
+					break;
+				}
+				if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
+					nf_tables_table_disable(net, trans->ctx.table);
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+					trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+				}
+				trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+				nft_trans_destroy(trans);
+			} else {
+				list_del_rcu(&trans->ctx.table->list);
+			}
+			break;
+		case NFT_MSG_DELTABLE:
+			nft_clear(trans->ctx.net, trans->ctx.table);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans)) {
+				free_percpu(nft_trans_chain_stats(trans));
+				kfree(nft_trans_chain_name(trans));
+				nft_trans_destroy(trans);
+			} else {
+				if (nft_trans_chain_bound(trans)) {
+					nft_trans_destroy(trans);
+					break;
+				}
+				nft_use_dec_restore(&trans->ctx.table->use);
+				nft_chain_del(trans->ctx.chain);
+				nf_tables_unregister_hook(trans->ctx.net,
+							  trans->ctx.table,
+							  trans->ctx.chain);
+			}
+			break;
+		case NFT_MSG_DELCHAIN:
+			nft_use_inc_restore(&trans->ctx.table->use);
+			nft_clear(trans->ctx.net, trans->ctx.chain);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWRULE:
+			if (nft_trans_rule_bound(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			nft_use_dec_restore(&trans->ctx.chain->use);
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			nft_rule_expr_deactivate(&trans->ctx,
+						 nft_trans_rule(trans),
+						 NFT_TRANS_ABORT);
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+			break;
+		case NFT_MSG_DELRULE:
+			nft_use_inc_restore(&trans->ctx.chain->use);
+			nft_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWSET:
+			if (nft_trans_set_update(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			nft_use_dec_restore(&trans->ctx.table->use);
+			if (nft_trans_set_bound(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			nft_trans_set(trans)->dead = 1;
+			list_del_rcu(&nft_trans_set(trans)->list);
+			break;
+		case NFT_MSG_DELSET:
+			nft_use_inc_restore(&trans->ctx.table->use);
+			nft_clear(trans->ctx.net, nft_trans_set(trans));
+			if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+				nft_map_activate(&trans->ctx, nft_trans_set(trans));
+
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set_bound(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			te = (struct nft_trans_elem *)trans->data;
+			nft_setelem_remove(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem))
+				atomic_dec(&te->set->nelems);
+
+			if (te->set->ops->abort &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
+			break;
+		case NFT_MSG_DELSETELEM:
+			te = (struct nft_trans_elem *)trans->data;
+
+			nft_setelem_data_activate(net, te->set, &te->elem);
+			nft_setelem_activate(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem))
+				te->set->ndeact--;
+
+			if (te->set->ops->abort &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWOBJ:
+			if (nft_trans_obj_update(trans)) {
+				nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
+				nft_trans_destroy(trans);
+			} else {
+				nft_use_dec_restore(&trans->ctx.table->use);
+				nft_obj_del(nft_trans_obj(trans));
+			}
+			break;
+		case NFT_MSG_DELOBJ:
+			nft_use_inc_restore(&trans->ctx.table->use);
+			nft_clear(trans->ctx.net, nft_trans_obj(trans));
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWFLOWTABLE:
+			if (nft_trans_flowtable_update(trans)) {
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable_hooks(trans));
+			} else {
+				nft_use_dec_restore(&trans->ctx.table->use);
+				list_del_rcu(&nft_trans_flowtable(trans)->list);
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable(trans)->hook_list);
+			}
+			break;
+		case NFT_MSG_DELFLOWTABLE:
+			if (nft_trans_flowtable_update(trans)) {
+				list_splice(&nft_trans_flowtable_hooks(trans),
+					    &nft_trans_flowtable(trans)->hook_list);
+			} else {
+				nft_use_inc_restore(&trans->ctx.table->use);
+				nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+			}
+			nft_trans_destroy(trans);
+			break;
+		}
+	}
+
+	nft_set_abort_update(&set_update_list);
+
+	synchronize_rcu();
+
+	list_for_each_entry_safe_reverse(trans, next,
+					 &nft_net->commit_list, list) {
+		nft_trans_list_del(trans);
+		nf_tables_abort_release(trans);
+	}
+
+	if (action == NFNL_ABORT_AUTOLOAD)
+		nf_tables_module_autoload(net);
+	else
+		nf_tables_module_autoload_cleanup(net);
+
+	return 0;
+}
+
+static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+			   enum nfnl_abort_action action)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	unsigned int gc_seq;
+	int ret;
+
+	gc_seq = nft_gc_seq_begin(nft_net);
+	ret = __nf_tables_abort(net, action);
+	nft_gc_seq_end(nft_net, gc_seq);
+
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return ret;
+}
+
+static bool nf_tables_valid_genid(struct net *net, u32 genid)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	bool genid_ok;
+
+	mutex_lock(&nft_net->commit_mutex);
+
+	genid_ok = genid == 0 || nft_net->base_seq == genid;
+	if (!genid_ok)
+		mutex_unlock(&nft_net->commit_mutex);
+
+	/* else, commit mutex has to be released by commit or abort function */
+	return genid_ok;
+}
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+	.name		= "nf_tables",
+	.subsys_id	= NFNL_SUBSYS_NFTABLES,
+	.cb_count	= NFT_MSG_MAX,
+	.cb		= nf_tables_cb,
+	.commit		= nf_tables_commit,
+	.abort		= nf_tables_abort,
+	.valid_genid	= nf_tables_valid_genid,
+	.owner		= THIS_MODULE,
+};
+
+int nft_chain_validate_dependency(const struct nft_chain *chain,
+				  enum nft_chain_types type)
+{
+	const struct nft_base_chain *basechain;
+
+	if (nft_is_base_chain(chain)) {
+		basechain = nft_base_chain(chain);
+		if (basechain->type->type != type)
+			return -EOPNOTSUPP;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
+
+int nft_chain_validate_hooks(const struct nft_chain *chain,
+			     unsigned int hook_flags)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_is_base_chain(chain)) {
+		basechain = nft_base_chain(chain);
+
+		if ((1 << basechain->ops.hooknum) & hook_flags)
+			return 0;
+
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
+
+/*
+ * Loop detection - walk through the ruleset beginning at the destination chain
+ * of a new jump until either the source chain is reached (loop) or all
+ * reachable chains have been traversed.
+ *
+ * The loop check is performed whenever a new jump verdict is added to an
+ * expression or verdict map or a verdict map is bound to a new chain.
+ */
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+				 const struct nft_chain *chain);
+
+static int nft_check_loops(const struct nft_ctx *ctx,
+			   const struct nft_set_ext *ext)
+{
+	const struct nft_data *data;
+	int ret;
+
+	data = nft_set_ext_data(ext);
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		ret = nf_tables_check_loops(ctx, data->verdict.chain);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
+					struct nft_set *set,
+					const struct nft_set_iter *iter,
+					struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+		return 0;
+
+	return nft_check_loops(ctx, ext);
+}
+
+static int nft_set_catchall_loops(const struct nft_ctx *ctx,
+				  struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		ret = nft_check_loops(ctx, ext);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+				 const struct nft_chain *chain)
+{
+	const struct nft_rule *rule;
+	const struct nft_expr *expr, *last;
+	struct nft_set *set;
+	struct nft_set_binding *binding;
+	struct nft_set_iter iter;
+
+	if (ctx->chain == chain)
+		return -ELOOP;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		nft_rule_for_each_expr(expr, last, rule) {
+			struct nft_immediate_expr *priv;
+			const struct nft_data *data;
+			int err;
+
+			if (strcmp(expr->ops->type->name, "immediate"))
+				continue;
+
+			priv = nft_expr_priv(expr);
+			if (priv->dreg != NFT_REG_VERDICT)
+				continue;
+
+			data = &priv->data;
+			switch (data->verdict.code) {
+			case NFT_JUMP:
+			case NFT_GOTO:
+				err = nf_tables_check_loops(ctx,
+							data->verdict.chain);
+				if (err < 0)
+					return err;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	list_for_each_entry(set, &ctx->table->sets, list) {
+		if (!nft_is_active_next(ctx->net, set))
+			continue;
+		if (!(set->flags & NFT_SET_MAP) ||
+		    set->dtype != NFT_DATA_VERDICT)
+			continue;
+
+		list_for_each_entry(binding, &set->bindings, list) {
+			if (!(binding->flags & NFT_SET_MAP) ||
+			    binding->chain != chain)
+				continue;
+
+			iter.genmask	= nft_genmask_next(ctx->net);
+			iter.skip 	= 0;
+			iter.count	= 0;
+			iter.err	= 0;
+			iter.fn		= nf_tables_loop_check_setelem;
+
+			set->ops->walk(ctx, set, &iter);
+			if (!iter.err)
+				iter.err = nft_set_catchall_loops(ctx, set);
+
+			if (iter.err < 0)
+				return iter.err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	nft_parse_u32_check - fetch u32 attribute and check for maximum value
+ *
+ *	@attr: netlink attribute to fetch value from
+ *	@max: maximum value to be stored in dest
+ *	@dest: pointer to the variable
+ *
+ *	Parse, check and store a given u32 netlink attribute into variable.
+ *	This function returns -ERANGE if the value goes over maximum value.
+ *	Otherwise a 0 is returned and the attribute value is stored in the
+ *	destination variable.
+ */
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+{
+	u32 val;
+
+	val = ntohl(nla_get_be32(attr));
+	if (val > max)
+		return -ERANGE;
+
+	*dest = val;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_u32_check);
+
+static int nft_parse_register(const struct nlattr *attr, u32 *preg)
+{
+	unsigned int reg;
+
+	reg = ntohl(nla_get_be32(attr));
+	switch (reg) {
+	case NFT_REG_VERDICT...NFT_REG_4:
+		*preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+		break;
+	case NFT_REG32_00...NFT_REG32_15:
+		*preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+/**
+ *	nft_dump_register - dump a register value to a netlink attribute
+ *
+ *	@skb: socket buffer
+ *	@attr: attribute number
+ *	@reg: register number
+ *
+ *	Construct a netlink attribute containing the register number. For
+ *	compatibility reasons, register numbers being a multiple of 4 are
+ *	translated to the corresponding 128 bit register numbers.
+ */
+int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
+{
+	if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+		reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+	else
+		reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
+	return nla_put_be32(skb, attr, htonl(reg));
+}
+EXPORT_SYMBOL_GPL(nft_dump_register);
+
+static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+{
+	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+		return -EINVAL;
+	if (len == 0)
+		return -EINVAL;
+	if (reg * NFT_REG32_SIZE + len > sizeof_field(struct nft_regs, data))
+		return -ERANGE;
+
+	return 0;
+}
+
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+{
+	u32 reg;
+	int err;
+
+	err = nft_parse_register(attr, &reg);
+	if (err < 0)
+		return err;
+
+	err = nft_validate_register_load(reg, len);
+	if (err < 0)
+		return err;
+
+	*sreg = reg;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_load);
+
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+				       enum nft_registers reg,
+				       const struct nft_data *data,
+				       enum nft_data_types type,
+				       unsigned int len)
+{
+	int err;
+
+	switch (reg) {
+	case NFT_REG_VERDICT:
+		if (type != NFT_DATA_VERDICT)
+			return -EINVAL;
+
+		if (data != NULL &&
+		    (data->verdict.code == NFT_GOTO ||
+		     data->verdict.code == NFT_JUMP)) {
+			err = nf_tables_check_loops(ctx, data->verdict.chain);
+			if (err < 0)
+				return err;
+		}
+
+		return 0;
+	default:
+		if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+			return -EINVAL;
+		if (len == 0)
+			return -EINVAL;
+		if (reg * NFT_REG32_SIZE + len >
+		    sizeof_field(struct nft_regs, data))
+			return -ERANGE;
+
+		if (data != NULL && type != NFT_DATA_VALUE)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int nft_parse_register_store(const struct nft_ctx *ctx,
+			     const struct nlattr *attr, u8 *dreg,
+			     const struct nft_data *data,
+			     enum nft_data_types type, unsigned int len)
+{
+	int err;
+	u32 reg;
+
+	err = nft_parse_register(attr, &reg);
+	if (err < 0)
+		return err;
+
+	err = nft_validate_register_store(ctx, reg, data, type, len);
+	if (err < 0)
+		return err;
+
+	*dreg = reg;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_store);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
+	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_VERDICT_CHAIN_ID]	= { .type = NLA_U32 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+			    struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+	struct nft_chain *chain;
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla,
+					  nft_verdict_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_VERDICT_CODE])
+		return -EINVAL;
+
+	/* zero padding hole for memcmp */
+	memset(data, 0, sizeof(*data));
+	data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+	switch (data->verdict.code) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+		break;
+	case NFT_CONTINUE:
+	case NFT_BREAK:
+	case NFT_RETURN:
+		break;
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (tb[NFTA_VERDICT_CHAIN]) {
+			chain = nft_chain_lookup(ctx->net, ctx->table,
+						 tb[NFTA_VERDICT_CHAIN],
+						 genmask);
+		} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
+			chain = nft_chain_lookup_byid(ctx->net, ctx->table,
+						      tb[NFTA_VERDICT_CHAIN_ID],
+						      genmask);
+			if (IS_ERR(chain))
+				return PTR_ERR(chain);
+		} else {
+			return -EINVAL;
+		}
+
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+		if (nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+		if (nft_chain_is_bound(chain))
+			return -EINVAL;
+		if (desc->flags & NFT_DATA_DESC_SETELEM &&
+		    chain->flags & NFT_CHAIN_BINDING)
+			return -EINVAL;
+		if (!nft_use_inc(&chain->use))
+			return -EMFILE;
+
+		data->verdict.chain = chain;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	desc->len = sizeof(data->verdict);
+
+	return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+	struct nft_chain *chain;
+
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		chain = data->verdict.chain;
+		nft_use_dec(&chain->use);
+		break;
+	}
+}
+
+int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start_noflag(skb, type);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
+		goto nla_put_failure;
+
+	switch (v->code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
+				   v->chain->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx,
+			  struct nft_data *data, struct nft_data_desc *desc,
+			  const struct nlattr *nla)
+{
+	unsigned int len;
+
+	len = nla_len(nla);
+	if (len == 0)
+		return -EINVAL;
+	if (len > desc->size)
+		return -EOVERFLOW;
+	if (desc->len) {
+		if (len != desc->len)
+			return -EINVAL;
+	} else {
+		desc->len = len;
+	}
+
+	nla_memcpy(data->data, nla, len);
+
+	return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+			  unsigned int len)
+{
+	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY },
+	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
+};
+
+/**
+ *	nft_data_init - parse nf_tables data netlink attributes
+ *
+ *	@ctx: context of the expression using the data
+ *	@data: destination struct nft_data
+ *	@desc: data description
+ *	@nla: netlink attribute containing data
+ *
+ *	Parse the netlink data attributes and initialize a struct nft_data.
+ *	The type and length of data are returned in the data description.
+ *
+ *	The caller can indicate that it only wants to accept data of type
+ *	NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+		  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_DATA_MAX + 1];
+	int err;
+
+	if (WARN_ON_ONCE(!desc->size))
+		return -EINVAL;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
+					  nft_data_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DATA_VALUE]) {
+		if (desc->type != NFT_DATA_VALUE)
+			return -EINVAL;
+
+		err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+	} else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) {
+		if (desc->type != NFT_DATA_VERDICT)
+			return -EINVAL;
+
+		err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+	} else {
+		err = -EINVAL;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ *	nft_data_release - release a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	all others need to be released by calling this function.
+ */
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type < NFT_DATA_VERDICT)
+		return;
+	switch (type) {
+	case NFT_DATA_VERDICT:
+		return nft_verdict_uninit(data);
+	default:
+		WARN_ON(1);
+	}
+}
+EXPORT_SYMBOL_GPL(nft_data_release);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+		  enum nft_data_types type, unsigned int len)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start_noflag(skb, attr);
+	if (nest == NULL)
+		return -1;
+
+	switch (type) {
+	case NFT_DATA_VALUE:
+		err = nft_value_dump(skb, data, len);
+		break;
+	case NFT_DATA_VERDICT:
+		err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
+		break;
+	default:
+		err = -EINVAL;
+		WARN_ON(1);
+	}
+
+	nla_nest_end(skb, nest);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+int __nft_release_basechain(struct nft_ctx *ctx)
+{
+	struct nft_rule *rule, *nr;
+
+	if (WARN_ON(!nft_is_base_chain(ctx->chain)))
+		return 0;
+
+	nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
+	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+		list_del(&rule->list);
+		nft_use_dec(&ctx->chain->use);
+		nf_tables_rule_release(ctx, rule);
+	}
+	nft_chain_del(ctx->chain);
+	nft_use_dec(&ctx->table->use);
+	nf_tables_chain_destroy(ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__nft_release_basechain);
+
+static void __nft_release_hook(struct net *net, struct nft_table *table)
+{
+	struct nft_flowtable *flowtable;
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list)
+		__nf_tables_unregister_hook(net, table, chain, true);
+	list_for_each_entry(flowtable, &table->flowtables, list)
+		__nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
+						     true);
+}
+
+static void __nft_release_hooks(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_table *table;
+
+	list_for_each_entry(table, &nft_net->tables, list) {
+		if (nft_table_has_owner(table))
+			continue;
+
+		__nft_release_hook(net, table);
+	}
+}
+
+static void __nft_release_table(struct net *net, struct nft_table *table)
+{
+	struct nft_flowtable *flowtable, *nf;
+	struct nft_chain *chain, *nc;
+	struct nft_object *obj, *ne;
+	struct nft_rule *rule, *nr;
+	struct nft_set *set, *ns;
+	struct nft_ctx ctx = {
+		.net	= net,
+		.family	= NFPROTO_NETDEV,
+	};
+
+	ctx.family = table->family;
+	ctx.table = table;
+	list_for_each_entry(chain, &table->chains, list) {
+		if (nft_chain_binding(chain))
+			continue;
+
+		ctx.chain = chain;
+		list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+			list_del(&rule->list);
+			nft_use_dec(&chain->use);
+			nf_tables_rule_release(&ctx, rule);
+		}
+	}
+	list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+		list_del(&flowtable->list);
+		nft_use_dec(&table->use);
+		nf_tables_flowtable_destroy(flowtable);
+	}
+	list_for_each_entry_safe(set, ns, &table->sets, list) {
+		list_del(&set->list);
+		nft_use_dec(&table->use);
+		if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+			nft_map_deactivate(&ctx, set);
+
+		nft_set_destroy(&ctx, set);
+	}
+	list_for_each_entry_safe(obj, ne, &table->objects, list) {
+		nft_obj_del(obj);
+		nft_use_dec(&table->use);
+		nft_obj_destroy(&ctx, obj);
+	}
+	list_for_each_entry_safe(chain, nc, &table->chains, list) {
+		ctx.chain = chain;
+		nft_chain_del(chain);
+		nft_use_dec(&table->use);
+		nf_tables_chain_destroy(&ctx);
+	}
+	nf_tables_table_destroy(&ctx);
+}
+
+static void __nft_release_tables(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_table *table, *nt;
+
+	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
+		if (nft_table_has_owner(table))
+			continue;
+
+		list_del(&table->list);
+
+		__nft_release_table(net, table);
+	}
+}
+
+static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct nft_table *table, *to_delete[8];
+	struct nftables_pernet *nft_net;
+	struct netlink_notify *n = ptr;
+	struct net *net = n->net;
+	unsigned int deleted;
+	bool restart = false;
+	unsigned int gc_seq;
+
+	if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
+		return NOTIFY_DONE;
+
+	nft_net = nft_pernet(net);
+	deleted = 0;
+	mutex_lock(&nft_net->commit_mutex);
+
+	gc_seq = nft_gc_seq_begin(nft_net);
+
+	if (!list_empty(&nf_tables_destroy_list))
+		nf_tables_trans_destroy_flush_work();
+again:
+	list_for_each_entry(table, &nft_net->tables, list) {
+		if (nft_table_has_owner(table) &&
+		    n->portid == table->nlpid) {
+			__nft_release_hook(net, table);
+			list_del_rcu(&table->list);
+			to_delete[deleted++] = table;
+			if (deleted >= ARRAY_SIZE(to_delete))
+				break;
+		}
+	}
+	if (deleted) {
+		restart = deleted >= ARRAY_SIZE(to_delete);
+		synchronize_rcu();
+		while (deleted)
+			__nft_release_table(net, to_delete[--deleted]);
+
+		if (restart)
+			goto again;
+	}
+	nft_gc_seq_end(nft_net, gc_seq);
+
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nft_nl_notifier = {
+	.notifier_call  = nft_rcv_nl_event,
+};
+
+static int __net_init nf_tables_init_net(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	INIT_LIST_HEAD(&nft_net->tables);
+	INIT_LIST_HEAD(&nft_net->commit_list);
+	INIT_LIST_HEAD(&nft_net->binding_list);
+	INIT_LIST_HEAD(&nft_net->module_list);
+	INIT_LIST_HEAD(&nft_net->notify_list);
+	mutex_init(&nft_net->commit_mutex);
+	nft_net->base_seq = 1;
+	nft_net->validate_state = NFT_VALIDATE_SKIP;
+	nft_net->gc_seq = 0;
+
+	return 0;
+}
+
+static void __net_exit nf_tables_pre_exit_net(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+
+	mutex_lock(&nft_net->commit_mutex);
+	__nft_release_hooks(net);
+	mutex_unlock(&nft_net->commit_mutex);
+}
+
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	unsigned int gc_seq;
+
+	mutex_lock(&nft_net->commit_mutex);
+
+	gc_seq = nft_gc_seq_begin(nft_net);
+
+	if (!list_empty(&nft_net->commit_list) ||
+	    !list_empty(&nft_net->module_list))
+		__nf_tables_abort(net, NFNL_ABORT_NONE);
+
+	__nft_release_tables(net);
+
+	nft_gc_seq_end(nft_net, gc_seq);
+
+	mutex_unlock(&nft_net->commit_mutex);
+	WARN_ON_ONCE(!list_empty(&nft_net->tables));
+	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
+	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
+}
+
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+	flush_work(&trans_gc_work);
+}
+
+static struct pernet_operations nf_tables_net_ops = {
+	.init		= nf_tables_init_net,
+	.pre_exit	= nf_tables_pre_exit_net,
+	.exit		= nf_tables_exit_net,
+	.exit_batch	= nf_tables_exit_batch,
+	.id		= &nf_tables_net_id,
+	.size		= sizeof(struct nftables_pernet),
+};
+
+static int __init nf_tables_module_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&nf_tables_net_ops);
+	if (err < 0)
+		return err;
+
+	err = nft_chain_filter_init();
+	if (err < 0)
+		goto err_chain_filter;
+
+	err = nf_tables_core_module_init();
+	if (err < 0)
+		goto err_core_module;
+
+	err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
+	if (err < 0)
+		goto err_netdev_notifier;
+
+	err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
+	if (err < 0)
+		goto err_rht_objname;
+
+	err = nft_offload_init();
+	if (err < 0)
+		goto err_offload;
+
+	err = netlink_register_notifier(&nft_nl_notifier);
+	if (err < 0)
+		goto err_netlink_notifier;
+
+	/* must be last */
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err_nfnl_subsys;
+
+	nft_chain_route_init();
+
+	return err;
+
+err_nfnl_subsys:
+	netlink_unregister_notifier(&nft_nl_notifier);
+err_netlink_notifier:
+	nft_offload_exit();
+err_offload:
+	rhltable_destroy(&nft_objname_ht);
+err_rht_objname:
+	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+err_netdev_notifier:
+	nf_tables_core_module_exit();
+err_core_module:
+	nft_chain_filter_fini();
+err_chain_filter:
+	unregister_pernet_subsys(&nf_tables_net_ops);
+	return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	netlink_unregister_notifier(&nft_nl_notifier);
+	nft_offload_exit();
+	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+	nft_chain_filter_fini();
+	nft_chain_route_fini();
+	unregister_pernet_subsys(&nf_tables_net_ops);
+	cancel_work_sync(&trans_gc_work);
+	cancel_work_sync(&trans_destroy_work);
+	rcu_barrier();
+	rhltable_destroy(&nft_objname_ht);
+	nf_tables_core_module_exit();
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 000000000..e0c117229
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/static_key.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/nft_meta.h>
+
+static noinline void __nft_trace_packet(struct nft_traceinfo *info,
+					const struct nft_chain *chain,
+					enum nft_trace_types type)
+{
+	if (!info->trace || !info->nf_trace)
+		return;
+
+	info->chain = chain;
+	info->type = type;
+
+	nft_trace_notify(info);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+				    struct nft_traceinfo *info,
+				    const struct nft_chain *chain,
+				    const struct nft_rule_dp *rule,
+				    enum nft_trace_types type)
+{
+	if (static_branch_unlikely(&nft_trace_enabled)) {
+		info->nf_trace = pkt->skb->nf_trace;
+		info->rule = rule;
+		__nft_trace_packet(info, chain, type);
+	}
+}
+
+static inline void nft_trace_copy_nftrace(const struct nft_pktinfo *pkt,
+					  struct nft_traceinfo *info)
+{
+	if (static_branch_unlikely(&nft_trace_enabled)) {
+		if (info->trace)
+			info->nf_trace = pkt->skb->nf_trace;
+	}
+}
+
+static void nft_bitwise_fast_eval(const struct nft_expr *expr,
+				  struct nft_regs *regs)
+{
+	const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
+
+	*dst = (*src & priv->mask) ^ priv->xor;
+}
+
+static void nft_cmp_fast_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs)
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+
+	if (((regs->data[priv->sreg] & priv->mask) == priv->data) ^ priv->inv)
+		return;
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_cmp16_fast_eval(const struct nft_expr *expr,
+				struct nft_regs *regs)
+{
+	const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+	const u64 *reg_data = (const u64 *)&regs->data[priv->sreg];
+	const u64 *mask = (const u64 *)&priv->mask;
+	const u64 *data = (const u64 *)&priv->data;
+
+	if (((reg_data[0] & mask[0]) == data[0] &&
+	    ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
+		return;
+	regs->verdict.code = NFT_BREAK;
+}
+
+static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
+					 const struct nft_chain *chain,
+					 const struct nft_regs *regs)
+{
+	enum nft_trace_types type;
+
+	switch (regs->verdict.code) {
+	case NFT_CONTINUE:
+	case NFT_RETURN:
+		type = NFT_TRACETYPE_RETURN;
+		break;
+	case NF_STOLEN:
+		type = NFT_TRACETYPE_RULE;
+		/* can't access skb->nf_trace; use copy */
+		break;
+	default:
+		type = NFT_TRACETYPE_RULE;
+
+		if (info->trace)
+			info->nf_trace = info->pkt->skb->nf_trace;
+		break;
+	}
+
+	__nft_trace_packet(info, chain, type);
+}
+
+static inline void nft_trace_verdict(struct nft_traceinfo *info,
+				     const struct nft_chain *chain,
+				     const struct nft_rule_dp *rule,
+				     const struct nft_regs *regs)
+{
+	if (static_branch_unlikely(&nft_trace_enabled)) {
+		info->rule = rule;
+		__nft_trace_verdict(info, chain, regs);
+	}
+}
+
+static bool nft_payload_fast_eval(const struct nft_expr *expr,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	u32 *dest = &regs->data[priv->dreg];
+	unsigned char *ptr;
+
+	if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
+		ptr = skb_network_header(skb);
+	else {
+		if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+			return false;
+		ptr = skb->data + nft_thoff(pkt);
+	}
+
+	ptr += priv->offset;
+
+	if (unlikely(ptr + priv->len > skb_tail_pointer(skb)))
+		return false;
+
+	*dest = 0;
+	if (priv->len == 2)
+		*(u16 *)dest = *(u16 *)ptr;
+	else if (priv->len == 4)
+		*(u32 *)dest = *(u32 *)ptr;
+	else
+		*(u8 *)dest = *(u8 *)ptr;
+	return true;
+}
+
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+					    const struct nft_pktinfo *pkt)
+{
+	struct nft_base_chain *base_chain;
+	struct nft_stats __percpu *pstats;
+	struct nft_stats *stats;
+
+	base_chain = nft_base_chain(chain);
+
+	pstats = READ_ONCE(base_chain->stats);
+	if (pstats) {
+		local_bh_disable();
+		stats = this_cpu_ptr(pstats);
+		u64_stats_update_begin(&stats->syncp);
+		stats->pkts++;
+		stats->bytes += pkt->skb->len;
+		u64_stats_update_end(&stats->syncp);
+		local_bh_enable();
+	}
+}
+
+struct nft_jumpstack {
+	const struct nft_chain *chain;
+	const struct nft_rule_dp *rule;
+	const struct nft_rule_dp *last_rule;
+};
+
+static void expr_call_ops_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       struct nft_pktinfo *pkt)
+{
+#ifdef CONFIG_RETPOLINE
+	unsigned long e = (unsigned long)expr->ops->eval;
+#define X(e, fun) \
+	do { if ((e) == (unsigned long)(fun)) \
+		return fun(expr, regs, pkt); } while (0)
+
+	X(e, nft_payload_eval);
+	X(e, nft_cmp_eval);
+	X(e, nft_counter_eval);
+	X(e, nft_meta_get_eval);
+	X(e, nft_lookup_eval);
+	X(e, nft_range_eval);
+	X(e, nft_immediate_eval);
+	X(e, nft_byteorder_eval);
+	X(e, nft_dynset_eval);
+	X(e, nft_rt_get_eval);
+	X(e, nft_bitwise_eval);
+#undef  X
+#endif /* CONFIG_RETPOLINE */
+	expr->ops->eval(expr, regs, pkt);
+}
+
+#define nft_rule_expr_first(rule)	(struct nft_expr *)&rule->data[0]
+#define nft_rule_expr_next(expr)	((void *)expr) + expr->ops->size
+#define nft_rule_expr_last(rule)	(struct nft_expr *)&rule->data[rule->dlen]
+#define nft_rule_next(rule)		(void *)rule + sizeof(*rule) + rule->dlen
+
+#define nft_rule_dp_for_each_expr(expr, last, rule) \
+        for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \
+             (expr) != (last); \
+             (expr) = nft_rule_expr_next(expr))
+
+unsigned int
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+{
+	const struct nft_chain *chain = priv, *basechain = chain;
+	const struct nft_rule_dp *rule, *last_rule;
+	const struct net *net = nft_net(pkt);
+	const struct nft_expr *expr, *last;
+	struct nft_regs regs = {};
+	unsigned int stackptr = 0;
+	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+	bool genbit = READ_ONCE(net->nft.gencursor);
+	struct nft_rule_blob *blob;
+	struct nft_traceinfo info;
+
+	info.trace = false;
+	if (static_branch_unlikely(&nft_trace_enabled))
+		nft_trace_init(&info, pkt, &regs.verdict, basechain);
+do_chain:
+	if (genbit)
+		blob = rcu_dereference(chain->blob_gen_1);
+	else
+		blob = rcu_dereference(chain->blob_gen_0);
+
+	rule = (struct nft_rule_dp *)blob->data;
+	last_rule = (void *)blob->data + blob->size;
+next_rule:
+	regs.verdict.code = NFT_CONTINUE;
+	for (; rule < last_rule; rule = nft_rule_next(rule)) {
+		nft_rule_dp_for_each_expr(expr, last, rule) {
+			if (expr->ops == &nft_cmp_fast_ops)
+				nft_cmp_fast_eval(expr, &regs);
+			else if (expr->ops == &nft_cmp16_fast_ops)
+				nft_cmp16_fast_eval(expr, &regs);
+			else if (expr->ops == &nft_bitwise_fast_ops)
+				nft_bitwise_fast_eval(expr, &regs);
+			else if (expr->ops != &nft_payload_fast_ops ||
+				 !nft_payload_fast_eval(expr, &regs, pkt))
+				expr_call_ops_eval(expr, &regs, pkt);
+
+			if (regs.verdict.code != NFT_CONTINUE)
+				break;
+		}
+
+		switch (regs.verdict.code) {
+		case NFT_BREAK:
+			regs.verdict.code = NFT_CONTINUE;
+			nft_trace_copy_nftrace(pkt, &info);
+			continue;
+		case NFT_CONTINUE:
+			nft_trace_packet(pkt, &info, chain, rule,
+					 NFT_TRACETYPE_RULE);
+			continue;
+		}
+		break;
+	}
+
+	nft_trace_verdict(&info, chain, rule, &regs);
+
+	switch (regs.verdict.code & NF_VERDICT_MASK) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+	case NF_STOLEN:
+		return regs.verdict.code;
+	}
+
+	switch (regs.verdict.code) {
+	case NFT_JUMP:
+		if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
+			return NF_DROP;
+		jumpstack[stackptr].chain = chain;
+		jumpstack[stackptr].rule = nft_rule_next(rule);
+		jumpstack[stackptr].last_rule = last_rule;
+		stackptr++;
+		fallthrough;
+	case NFT_GOTO:
+		chain = regs.verdict.chain;
+		goto do_chain;
+	case NFT_CONTINUE:
+	case NFT_RETURN:
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+
+	if (stackptr > 0) {
+		stackptr--;
+		chain = jumpstack[stackptr].chain;
+		rule = jumpstack[stackptr].rule;
+		last_rule = jumpstack[stackptr].last_rule;
+		goto next_rule;
+	}
+
+	nft_trace_packet(pkt, &info, basechain, NULL, NFT_TRACETYPE_POLICY);
+
+	if (static_branch_unlikely(&nft_counters_enabled))
+		nft_update_chain_stats(basechain, pkt);
+
+	return nft_base_chain(basechain)->policy;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain);
+
+static struct nft_expr_type *nft_basic_types[] = {
+	&nft_imm_type,
+	&nft_cmp_type,
+	&nft_lookup_type,
+	&nft_bitwise_type,
+	&nft_byteorder_type,
+	&nft_payload_type,
+	&nft_dynset_type,
+	&nft_range_type,
+	&nft_meta_type,
+	&nft_rt_type,
+	&nft_exthdr_type,
+	&nft_last_type,
+	&nft_counter_type,
+};
+
+static struct nft_object_type *nft_basic_objects[] = {
+#ifdef CONFIG_NETWORK_SECMARK
+	&nft_secmark_obj_type,
+#endif
+	&nft_counter_obj_type,
+};
+
+int __init nf_tables_core_module_init(void)
+{
+	int err, i, j = 0;
+
+	nft_counter_init_seqcount();
+
+	for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
+		err = nft_register_obj(nft_basic_objects[i]);
+		if (err)
+			goto err;
+	}
+
+	for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) {
+		err = nft_register_expr(nft_basic_types[j]);
+		if (err)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	while (j-- > 0)
+		nft_unregister_expr(nft_basic_types[j]);
+
+	while (i-- > 0)
+		nft_unregister_obj(nft_basic_objects[i]);
+
+	return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+	int i;
+
+	i = ARRAY_SIZE(nft_basic_types);
+	while (i-- > 0)
+		nft_unregister_expr(nft_basic_types[i]);
+
+	i = ARRAY_SIZE(nft_basic_objects);
+	while (i-- > 0)
+		nft_unregister_obj(nft_basic_objects[i]);
+}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
new file mode 100644
index 000000000..910ef881c
--- /dev/null
+++ b/net/netfilter/nf_tables_offload.c
@@ -0,0 +1,691 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/pkt_cls.h>
+
+static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
+{
+	struct nft_flow_rule *flow;
+
+	flow = kzalloc(sizeof(struct nft_flow_rule), GFP_KERNEL);
+	if (!flow)
+		return NULL;
+
+	flow->rule = flow_rule_alloc(num_actions);
+	if (!flow->rule) {
+		kfree(flow);
+		return NULL;
+	}
+
+	flow->rule->match.dissector	= &flow->match.dissector;
+	flow->rule->match.mask		= &flow->match.mask;
+	flow->rule->match.key		= &flow->match.key;
+
+	return flow;
+}
+
+void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+				 enum flow_dissector_key_id addr_type)
+{
+	struct nft_flow_match *match = &flow->match;
+	struct nft_flow_key *mask = &match->mask;
+	struct nft_flow_key *key = &match->key;
+
+	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+		return;
+
+	key->control.addr_type = addr_type;
+	mask->control.addr_type = 0xffff;
+	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+	match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
+		offsetof(struct nft_flow_key, control);
+}
+
+struct nft_offload_ethertype {
+	__be16 value;
+	__be16 mask;
+};
+
+static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
+					struct nft_flow_rule *flow)
+{
+	struct nft_flow_match *match = &flow->match;
+	struct nft_offload_ethertype ethertype = {
+		.value	= match->key.basic.n_proto,
+		.mask	= match->mask.basic.n_proto,
+	};
+
+	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+	    (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
+	     match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
+		match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
+		match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid;
+		match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid;
+		match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid;
+		match->key.vlan.vlan_tpid = ethertype.value;
+		match->mask.vlan.vlan_tpid = ethertype.mask;
+		match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
+			offsetof(struct nft_flow_key, cvlan);
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
+	} else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+		   (match->key.basic.n_proto == htons(ETH_P_8021Q) ||
+		    match->key.basic.n_proto == htons(ETH_P_8021AD))) {
+		match->key.basic.n_proto = match->key.vlan.vlan_tpid;
+		match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
+		match->key.vlan.vlan_tpid = ethertype.value;
+		match->mask.vlan.vlan_tpid = ethertype.mask;
+		match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+			offsetof(struct nft_flow_key, vlan);
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+	}
+}
+
+struct nft_flow_rule *nft_flow_rule_create(struct net *net,
+					   const struct nft_rule *rule)
+{
+	struct nft_offload_ctx *ctx;
+	struct nft_flow_rule *flow;
+	int num_actions = 0, err;
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (nft_expr_more(rule, expr)) {
+		if (expr->ops->offload_action &&
+		    expr->ops->offload_action(expr))
+			num_actions++;
+
+		expr = nft_expr_next(expr);
+	}
+
+	if (num_actions == 0)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	flow = nft_flow_rule_alloc(num_actions);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	expr = nft_expr_first(rule);
+
+	ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	ctx->net = net;
+	ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+
+	while (nft_expr_more(rule, expr)) {
+		if (!expr->ops->offload) {
+			err = -EOPNOTSUPP;
+			goto err_out;
+		}
+		err = expr->ops->offload(ctx, flow, expr);
+		if (err < 0)
+			goto err_out;
+
+		expr = nft_expr_next(expr);
+	}
+	nft_flow_rule_transfer_vlan(ctx, flow);
+
+	flow->proto = ctx->dep.l3num;
+	kfree(ctx);
+
+	return flow;
+err_out:
+	kfree(ctx);
+	nft_flow_rule_destroy(flow);
+
+	return ERR_PTR(err);
+}
+
+void nft_flow_rule_destroy(struct nft_flow_rule *flow)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	flow_action_for_each(i, entry, &flow->rule->action) {
+		switch (entry->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED:
+			dev_put(entry->dev);
+			break;
+		default:
+			break;
+		}
+	}
+	kfree(flow->rule);
+	kfree(flow);
+}
+
+void nft_offload_set_dependency(struct nft_offload_ctx *ctx,
+				enum nft_offload_dep_type type)
+{
+	ctx->dep.type = type;
+}
+
+void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
+				   const void *data, u32 len)
+{
+	switch (ctx->dep.type) {
+	case NFT_OFFLOAD_DEP_NETWORK:
+		WARN_ON(len != sizeof(__u16));
+		memcpy(&ctx->dep.l3num, data, sizeof(__u16));
+		break;
+	case NFT_OFFLOAD_DEP_TRANSPORT:
+		WARN_ON(len != sizeof(__u8));
+		memcpy(&ctx->dep.protonum, data, sizeof(__u8));
+		break;
+	default:
+		break;
+	}
+	ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+}
+
+static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
+					 __be16 proto, int priority,
+					 struct netlink_ext_ack *extack)
+{
+	common->protocol = proto;
+	common->prio = priority;
+	common->extack = extack;
+}
+
+static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+			     struct list_head *cb_list)
+{
+	struct flow_block_cb *block_cb;
+	int err;
+
+	list_for_each_entry(block_cb, cb_list, list) {
+		err = block_cb->cb(type, type_data, block_cb->cb_priv);
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
+{
+	if (basechain->ops.priority <= 0 ||
+	    basechain->ops.priority > USHRT_MAX)
+		return -1;
+
+	return 0;
+}
+
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+	struct net_device *dev;
+	struct nft_hook *hook;
+
+	if (nft_chain_offload_priority(basechain) < 0)
+		return false;
+
+	list_for_each_entry(hook, &basechain->hook_list, list) {
+		if (hook->ops.pf != NFPROTO_NETDEV ||
+		    hook->ops.hooknum != NF_NETDEV_INGRESS)
+			return false;
+
+		dev = hook->ops.dev;
+		if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+			return false;
+	}
+
+	return true;
+}
+
+static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+				       const struct nft_base_chain *basechain,
+				       const struct nft_rule *rule,
+				       const struct nft_flow_rule *flow,
+				       struct netlink_ext_ack *extack,
+				       enum flow_cls_command command)
+{
+	__be16 proto = ETH_P_ALL;
+
+	memset(cls_flow, 0, sizeof(*cls_flow));
+
+	if (flow)
+		proto = flow->proto;
+
+	nft_flow_offload_common_init(&cls_flow->common, proto,
+				     basechain->ops.priority, extack);
+	cls_flow->command = command;
+	cls_flow->cookie = (unsigned long) rule;
+	if (flow)
+		cls_flow->rule = flow->rule;
+}
+
+static int nft_flow_offload_cmd(const struct nft_chain *chain,
+				const struct nft_rule *rule,
+				struct nft_flow_rule *flow,
+				enum flow_cls_command command,
+				struct flow_cls_offload *cls_flow)
+{
+	struct netlink_ext_ack extack = {};
+	struct nft_base_chain *basechain;
+
+	if (!nft_is_base_chain(chain))
+		return -EOPNOTSUPP;
+
+	basechain = nft_base_chain(chain);
+	nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack,
+				   command);
+
+	return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow,
+				 &basechain->flow_block.cb_list);
+}
+
+static int nft_flow_offload_rule(const struct nft_chain *chain,
+				 struct nft_rule *rule,
+				 struct nft_flow_rule *flow,
+				 enum flow_cls_command command)
+{
+	struct flow_cls_offload cls_flow;
+
+	return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow);
+}
+
+int nft_flow_rule_stats(const struct nft_chain *chain,
+			const struct nft_rule *rule)
+{
+	struct flow_cls_offload cls_flow = {};
+	struct nft_expr *expr, *next;
+	int err;
+
+	err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS,
+				   &cls_flow);
+	if (err < 0)
+		return err;
+
+	nft_rule_for_each_expr(expr, next, rule) {
+		if (expr->ops->offload_stats)
+			expr->ops->offload_stats(expr, &cls_flow.stats);
+	}
+
+	return 0;
+}
+
+static int nft_flow_offload_bind(struct flow_block_offload *bo,
+				 struct nft_base_chain *basechain)
+{
+	list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
+	return 0;
+}
+
+static int nft_flow_offload_unbind(struct flow_block_offload *bo,
+				   struct nft_base_chain *basechain)
+{
+	struct flow_block_cb *block_cb, *next;
+	struct flow_cls_offload cls_flow;
+	struct netlink_ext_ack extack;
+	struct nft_chain *chain;
+	struct nft_rule *rule;
+
+	chain = &basechain->chain;
+	list_for_each_entry(rule, &chain->rules, list) {
+		memset(&extack, 0, sizeof(extack));
+		nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
+					   &extack, FLOW_CLS_DESTROY);
+		nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
+	}
+
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		list_del(&block_cb->list);
+		flow_block_cb_free(block_cb);
+	}
+
+	return 0;
+}
+
+static int nft_block_setup(struct nft_base_chain *basechain,
+			   struct flow_block_offload *bo,
+			   enum flow_block_command cmd)
+{
+	int err;
+
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		err = nft_flow_offload_bind(bo, basechain);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = nft_flow_offload_unbind(bo, basechain);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static void nft_flow_block_offload_init(struct flow_block_offload *bo,
+					struct net *net,
+					enum flow_block_command cmd,
+					struct nft_base_chain *basechain,
+					struct netlink_ext_ack *extack)
+{
+	memset(bo, 0, sizeof(*bo));
+	bo->net		= net;
+	bo->block	= &basechain->flow_block;
+	bo->command	= cmd;
+	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo->extack	= extack;
+	bo->cb_list_head = &basechain->flow_block.cb_list;
+	INIT_LIST_HEAD(&bo->cb_list);
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo;
+	int err;
+
+	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
+{
+	struct nft_base_chain *basechain = block_cb->indr.data;
+	struct net_device *dev = block_cb->indr.dev;
+	struct netlink_ext_ack extack = {};
+	struct nftables_pernet *nft_net;
+	struct net *net = dev_net(dev);
+	struct flow_block_offload bo;
+
+	nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
+				    basechain, &extack);
+	nft_net = nft_pernet(net);
+	mutex_lock(&nft_net->commit_mutex);
+	list_del(&block_cb->driver_list);
+	list_move(&block_cb->list, &bo.cb_list);
+	nft_flow_offload_unbind(&bo, basechain);
+	mutex_unlock(&nft_net->commit_mutex);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain,
+				      struct net_device *dev,
+				      enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo;
+	int err;
+
+	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack);
+
+	err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo,
+					  nft_indr_block_cleanup);
+	if (err < 0)
+		return err;
+
+	if (list_empty(&bo.cb_list))
+		return -EOPNOTSUPP;
+
+	return nft_block_setup(basechain, &bo, cmd);
+}
+
+static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	int err;
+
+	if (dev->netdev_ops->ndo_setup_tc)
+		err = nft_block_offload_cmd(basechain, dev, cmd);
+	else
+		err = nft_indr_block_offload_cmd(basechain, dev, cmd);
+
+	return err;
+}
+
+static int nft_flow_block_chain(struct nft_base_chain *basechain,
+				const struct net_device *this_dev,
+				enum flow_block_command cmd)
+{
+	struct net_device *dev;
+	struct nft_hook *hook;
+	int err, i = 0;
+
+	list_for_each_entry(hook, &basechain->hook_list, list) {
+		dev = hook->ops.dev;
+		if (this_dev && this_dev != dev)
+			continue;
+
+		err = nft_chain_offload_cmd(basechain, dev, cmd);
+		if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+			if (!this_dev)
+				goto err_flow_block;
+
+			return err;
+		}
+		i++;
+	}
+
+	return 0;
+
+err_flow_block:
+	list_for_each_entry(hook, &basechain->hook_list, list) {
+		if (i-- <= 0)
+			break;
+
+		dev = hook->ops.dev;
+		nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+	}
+	return err;
+}
+
+static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
+				  enum flow_block_command cmd)
+{
+	struct nft_base_chain *basechain;
+	u8 policy;
+
+	if (!nft_is_base_chain(chain))
+		return -EOPNOTSUPP;
+
+	basechain = nft_base_chain(chain);
+	policy = ppolicy ? *ppolicy : basechain->policy;
+
+	/* Only default policy to accept is supported for now. */
+	if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
+		return -EOPNOTSUPP;
+
+	return nft_flow_block_chain(basechain, NULL, cmd);
+}
+
+static void nft_flow_rule_offload_abort(struct net *net,
+					struct nft_trans *trans)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	int err = 0;
+
+	list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
+		if (trans->ctx.family != NFPROTO_NETDEV)
+			continue;
+
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+			    nft_trans_chain_update(trans))
+				continue;
+
+			err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+						     FLOW_BLOCK_UNBIND);
+			break;
+		case NFT_MSG_DELCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+						     FLOW_BLOCK_BIND);
+			break;
+		case NFT_MSG_NEWRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    NULL, FLOW_CLS_DESTROY);
+			break;
+		case NFT_MSG_DELRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    nft_trans_flow_rule(trans),
+						    FLOW_CLS_REPLACE);
+			break;
+		}
+
+		if (WARN_ON_ONCE(err))
+			break;
+	}
+}
+
+int nft_flow_rule_offload_commit(struct net *net)
+{
+	struct nftables_pernet *nft_net = nft_pernet(net);
+	struct nft_trans *trans;
+	int err = 0;
+	u8 policy;
+
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
+		if (trans->ctx.family != NFPROTO_NETDEV)
+			continue;
+
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+			    nft_trans_chain_update(trans))
+				continue;
+
+			policy = nft_trans_chain_policy(trans);
+			err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+						     FLOW_BLOCK_BIND);
+			break;
+		case NFT_MSG_DELCHAIN:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			policy = nft_trans_chain_policy(trans);
+			err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+						     FLOW_BLOCK_UNBIND);
+			break;
+		case NFT_MSG_NEWRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			if (trans->ctx.flags & NLM_F_REPLACE ||
+			    !(trans->ctx.flags & NLM_F_APPEND)) {
+				err = -EOPNOTSUPP;
+				break;
+			}
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    nft_trans_flow_rule(trans),
+						    FLOW_CLS_REPLACE);
+			break;
+		case NFT_MSG_DELRULE:
+			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			err = nft_flow_offload_rule(trans->ctx.chain,
+						    nft_trans_rule(trans),
+						    NULL, FLOW_CLS_DESTROY);
+			break;
+		}
+
+		if (err) {
+			nft_flow_rule_offload_abort(net, trans);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net,
+						 struct net_device *dev)
+{
+	struct nft_base_chain *basechain;
+	struct nft_hook *hook, *found;
+	const struct nft_table *table;
+	struct nft_chain *chain;
+
+	list_for_each_entry(table, &nft_net->tables, list) {
+		if (table->family != NFPROTO_NETDEV)
+			continue;
+
+		list_for_each_entry(chain, &table->chains, list) {
+			if (!nft_is_base_chain(chain) ||
+			    !(chain->flags & NFT_CHAIN_HW_OFFLOAD))
+				continue;
+
+			found = NULL;
+			basechain = nft_base_chain(chain);
+			list_for_each_entry(hook, &basechain->hook_list, list) {
+				if (hook->ops.dev != dev)
+					continue;
+
+				found = hook;
+				break;
+			}
+			if (!found)
+				continue;
+
+			return chain;
+		}
+	}
+
+	return NULL;
+}
+
+static int nft_offload_netdev_event(struct notifier_block *this,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nftables_pernet *nft_net;
+	struct net *net = dev_net(dev);
+	struct nft_chain *chain;
+
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+
+	nft_net = nft_pernet(net);
+	mutex_lock(&nft_net->commit_mutex);
+	chain = __nft_offload_get_chain(nft_net, dev);
+	if (chain)
+		nft_flow_block_chain(nft_base_chain(chain), dev,
+				     FLOW_BLOCK_UNBIND);
+
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nft_offload_netdev_notifier = {
+	.notifier_call	= nft_offload_netdev_event,
+};
+
+int nft_offload_init(void)
+{
+	return register_netdevice_notifier(&nft_offload_netdev_notifier);
+}
+
+void nft_offload_exit(void)
+{
+	unregister_netdevice_notifier(&nft_offload_netdev_notifier);
+}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
new file mode 100644
index 000000000..1163ba9c1
--- /dev/null
+++ b/net/netfilter/nf_tables_trace.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2015 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ */
+
+#include <linux/module.h>
+#include <linux/static_key.h>
+#include <linux/hash.h>
+#include <linux/siphash.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_TRACETYPE_LL_HSIZE		20
+#define NFT_TRACETYPE_NETWORK_HSIZE	40
+#define NFT_TRACETYPE_TRANSPORT_HSIZE	20
+
+DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
+EXPORT_SYMBOL_GPL(nft_trace_enabled);
+
+static int trace_fill_header(struct sk_buff *nlskb, u16 type,
+			     const struct sk_buff *skb,
+			     int off, unsigned int len)
+{
+	struct nlattr *nla;
+
+	if (len == 0)
+		return 0;
+
+	nla = nla_reserve(nlskb, type, len);
+	if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
+		return -1;
+
+	return 0;
+}
+
+static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
+				   const struct sk_buff *skb)
+{
+	struct vlan_ethhdr veth;
+	int off;
+
+	BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
+
+	off = skb_mac_header(skb) - skb->data;
+	if (off != -ETH_HLEN)
+		return -1;
+
+	if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
+		return -1;
+
+	veth.h_vlan_proto = skb->vlan_proto;
+	veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+	veth.h_vlan_encapsulated_proto = skb->protocol;
+
+	return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
+}
+
+static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
+				  const struct net_device *indev,
+				  const struct net_device *outdev)
+{
+	if (indev) {
+		if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
+				 htonl(indev->ifindex)))
+			return -1;
+
+		if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
+				 htons(indev->type)))
+			return -1;
+	}
+
+	if (outdev) {
+		if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
+				 htonl(outdev->ifindex)))
+			return -1;
+
+		if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
+				 htons(outdev->type)))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct sk_buff *skb = pkt->skb;
+	int off = skb_network_offset(skb);
+	unsigned int len, nh_end;
+
+	nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
+	len = min_t(unsigned int, nh_end - skb_network_offset(skb),
+		    NFT_TRACETYPE_NETWORK_HSIZE);
+	if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
+		return -1;
+
+	if (pkt->flags & NFT_PKTINFO_L4PROTO) {
+		len = min_t(unsigned int, skb->len - nft_thoff(pkt),
+			    NFT_TRACETYPE_TRANSPORT_HSIZE);
+		if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+				      nft_thoff(pkt), len))
+			return -1;
+	}
+
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_get(skb))
+		return nf_trace_fill_ll_header(nlskb, skb);
+
+	off = skb_mac_header(skb) - skb->data;
+	len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
+	return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
+				 skb, off, len);
+}
+
+static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
+				   const struct nft_traceinfo *info)
+{
+	if (!info->rule || info->rule->is_last)
+		return 0;
+
+	/* a continue verdict with ->type == RETURN means that this is
+	 * an implicit return (end of chain reached).
+	 *
+	 * Since no rule matched, the ->rule pointer is invalid.
+	 */
+	if (info->type == NFT_TRACETYPE_RETURN &&
+	    info->verdict->code == NFT_CONTINUE)
+		return 0;
+
+	return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
+			    cpu_to_be64(info->rule->handle),
+			    NFTA_TRACE_PAD);
+}
+
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+	switch (info->type) {
+	case NFT_TRACETYPE_RETURN:
+	case NFT_TRACETYPE_RULE:
+		break;
+	default:
+		return false;
+	}
+
+	switch (info->verdict->code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+void nft_trace_notify(struct nft_traceinfo *info)
+{
+	const struct nft_pktinfo *pkt = info->pkt;
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	unsigned int size;
+	u32 mark = 0;
+	u16 event;
+
+	if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
+		return;
+
+	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
+		nla_total_size(strlen(info->chain->table->name)) +
+		nla_total_size(strlen(info->chain->name)) +
+		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
+		nla_total_size(sizeof(__be32)) +	/* trace type */
+		nla_total_size(0) +			/* VERDICT, nested */
+			nla_total_size(sizeof(u32)) +	/* verdict code */
+		nla_total_size(sizeof(u32)) +		/* id */
+		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
+		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
+		nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
+		nla_total_size(sizeof(u32)) +		/* iif */
+		nla_total_size(sizeof(__be16)) +	/* iiftype */
+		nla_total_size(sizeof(u32)) +		/* oif */
+		nla_total_size(sizeof(__be16)) +	/* oiftype */
+		nla_total_size(sizeof(u32)) +		/* mark */
+		nla_total_size(sizeof(u32)) +		/* nfproto */
+		nla_total_size(sizeof(u32));		/* policy */
+
+	if (nft_trace_have_verdict_chain(info))
+		size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
+	skb = nlmsg_new(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
+	nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+		goto nla_put_failure;
+
+	if (nf_trace_fill_rule_info(skb, info))
+		goto nla_put_failure;
+
+	switch (info->type) {
+	case NFT_TRACETYPE_UNSPEC:
+	case __NFT_TRACETYPE_MAX:
+		break;
+	case NFT_TRACETYPE_RETURN:
+	case NFT_TRACETYPE_RULE:
+		if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
+			goto nla_put_failure;
+
+		/* pkt->skb undefined iff NF_STOLEN, disable dump */
+		if (info->verdict->code == NF_STOLEN)
+			info->packet_dumped = true;
+		else
+			mark = pkt->skb->mark;
+
+		break;
+	case NFT_TRACETYPE_POLICY:
+		mark = pkt->skb->mark;
+
+		if (nla_put_be32(skb, NFTA_TRACE_POLICY,
+				 htonl(info->basechain->policy)))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
+		goto nla_put_failure;
+
+	if (!info->packet_dumped) {
+		if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
+			goto nla_put_failure;
+
+		if (nf_trace_fill_pkt_info(skb, pkt))
+			goto nla_put_failure;
+		info->packet_dumped = true;
+	}
+
+	nlmsg_end(skb, nlh);
+	nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+	return;
+
+ nla_put_failure:
+	WARN_ON_ONCE(1);
+	kfree_skb(skb);
+}
+
+void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+		    const struct nft_verdict *verdict,
+		    const struct nft_chain *chain)
+{
+	static siphash_key_t trace_key __read_mostly;
+	struct sk_buff *skb = pkt->skb;
+
+	info->basechain = nft_base_chain(chain);
+	info->trace = true;
+	info->nf_trace = pkt->skb->nf_trace;
+	info->packet_dumped = false;
+	info->pkt = pkt;
+	info->verdict = verdict;
+
+	net_get_random_once(&trace_key, sizeof(trace_key));
+
+	info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
+					skb_get_hash(skb),
+					skb->skb_iif,
+					&trace_key);
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
new file mode 100644
index 000000000..c9fbe0f70
--- /dev/null
+++ b/net/netfilter/nfnetlink.c
@@ -0,0 +1,811 @@
+/* Netfilter messages via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>,
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2005-2017 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Initial netfilter messages via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/uaccess.h>
+#include <net/sock.h>
+#include <linux/init.h>
+#include <linux/sched/signal.h>
+
+#include <net/netlink.h>
+#include <net/netns/generic.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+MODULE_DESCRIPTION("Netfilter messages via netlink socket");
+
+#define nfnl_dereference_protected(id) \
+	rcu_dereference_protected(table[(id)].subsys, \
+				  lockdep_nfnl_is_held((id)))
+
+#define NFNL_MAX_ATTR_COUNT	32
+
+static unsigned int nfnetlink_pernet_id __read_mostly;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static DEFINE_SPINLOCK(nfnl_grp_active_lock);
+#endif
+
+struct nfnl_net {
+	struct sock *nfnl;
+};
+
+static struct {
+	struct mutex				mutex;
+	const struct nfnetlink_subsystem __rcu	*subsys;
+} table[NFNL_SUBSYS_COUNT];
+
+static struct lock_class_key nfnl_lockdep_keys[NFNL_SUBSYS_COUNT];
+
+static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = {
+	[NFNL_SUBSYS_NONE] = "nfnl_subsys_none",
+	[NFNL_SUBSYS_CTNETLINK] = "nfnl_subsys_ctnetlink",
+	[NFNL_SUBSYS_CTNETLINK_EXP] = "nfnl_subsys_ctnetlink_exp",
+	[NFNL_SUBSYS_QUEUE] = "nfnl_subsys_queue",
+	[NFNL_SUBSYS_ULOG] = "nfnl_subsys_ulog",
+	[NFNL_SUBSYS_OSF] = "nfnl_subsys_osf",
+	[NFNL_SUBSYS_IPSET] = "nfnl_subsys_ipset",
+	[NFNL_SUBSYS_ACCT] = "nfnl_subsys_acct",
+	[NFNL_SUBSYS_CTNETLINK_TIMEOUT] = "nfnl_subsys_cttimeout",
+	[NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper",
+	[NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables",
+	[NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat",
+	[NFNL_SUBSYS_HOOK] = "nfnl_subsys_hook",
+};
+
+static const int nfnl_group2type[NFNLGRP_MAX+1] = {
+	[NFNLGRP_CONNTRACK_NEW]		= NFNL_SUBSYS_CTNETLINK,
+	[NFNLGRP_CONNTRACK_UPDATE]	= NFNL_SUBSYS_CTNETLINK,
+	[NFNLGRP_CONNTRACK_DESTROY]	= NFNL_SUBSYS_CTNETLINK,
+	[NFNLGRP_CONNTRACK_EXP_NEW]	= NFNL_SUBSYS_CTNETLINK_EXP,
+	[NFNLGRP_CONNTRACK_EXP_UPDATE]	= NFNL_SUBSYS_CTNETLINK_EXP,
+	[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
+	[NFNLGRP_NFTABLES]		= NFNL_SUBSYS_NFTABLES,
+	[NFNLGRP_ACCT_QUOTA]		= NFNL_SUBSYS_ACCT,
+	[NFNLGRP_NFTRACE]		= NFNL_SUBSYS_NFTABLES,
+};
+
+static struct nfnl_net *nfnl_pernet(struct net *net)
+{
+	return net_generic(net, nfnetlink_pernet_id);
+}
+
+void nfnl_lock(__u8 subsys_id)
+{
+	mutex_lock(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(nfnl_lock);
+
+void nfnl_unlock(__u8 subsys_id)
+{
+	mutex_unlock(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(nfnl_unlock);
+
+#ifdef CONFIG_PROVE_LOCKING
+bool lockdep_nfnl_is_held(u8 subsys_id)
+{
+	return lockdep_is_held(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
+#endif
+
+int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
+{
+	u8 cb_id;
+
+	/* Sanity-check attr_count size to avoid stack buffer overflow. */
+	for (cb_id = 0; cb_id < n->cb_count; cb_id++)
+		if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT))
+			return -EINVAL;
+
+	nfnl_lock(n->subsys_id);
+	if (table[n->subsys_id].subsys) {
+		nfnl_unlock(n->subsys_id);
+		return -EBUSY;
+	}
+	rcu_assign_pointer(table[n->subsys_id].subsys, n);
+	nfnl_unlock(n->subsys_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
+
+int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
+{
+	nfnl_lock(n->subsys_id);
+	table[n->subsys_id].subsys = NULL;
+	nfnl_unlock(n->subsys_id);
+	synchronize_rcu();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
+
+static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u16 type)
+{
+	u8 subsys_id = NFNL_SUBSYS_ID(type);
+
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
+		return NULL;
+
+	return rcu_dereference(table[subsys_id].subsys);
+}
+
+static inline const struct nfnl_callback *
+nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss)
+{
+	u8 cb_id = NFNL_MSG_TYPE(type);
+
+	if (cb_id >= ss->cb_count)
+		return NULL;
+
+	return &ss->cb[cb_id];
+}
+
+int nfnetlink_has_listeners(struct net *net, unsigned int group)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return netlink_has_listeners(nfnlnet->nfnl, group);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
+
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
+		   unsigned int group, int echo, gfp_t flags)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return nlmsg_notify(nfnlnet->nfnl, skb, portid, group, echo, flags);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_send);
+
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return netlink_set_err(nfnlnet->nfnl, portid, group, error);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_set_err);
+
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+	int err;
+
+	err = nlmsg_unicast(nfnlnet->nfnl, skb, portid);
+	if (err == -EAGAIN)
+		err = -ENOBUFS;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
+
+void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid,
+			 __u32 group, gfp_t allocation)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	netlink_broadcast(nfnlnet->nfnl, skb, portid, group, allocation);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_broadcast);
+
+/* Process one complete nfnetlink message. */
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	const struct nfnl_callback *nc;
+	const struct nfnetlink_subsystem *ss;
+	int type, err;
+
+	/* All the messages must at least contain nfgenmsg */
+	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
+		return 0;
+
+	type = nlh->nlmsg_type;
+replay:
+	rcu_read_lock();
+
+	ss = nfnetlink_get_subsys(type);
+	if (!ss) {
+#ifdef CONFIG_MODULES
+		rcu_read_unlock();
+		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
+		rcu_read_lock();
+		ss = nfnetlink_get_subsys(type);
+		if (!ss)
+#endif
+		{
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+	}
+
+	nc = nfnetlink_find_client(type, ss);
+	if (!nc) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	{
+		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+		struct nfnl_net *nfnlnet = nfnl_pernet(net);
+		u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+		struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
+		struct nlattr *attr = (void *)nlh + min_len;
+		int attrlen = nlh->nlmsg_len - min_len;
+		__u8 subsys_id = NFNL_SUBSYS_ID(type);
+		struct nfnl_info info = {
+			.net	= net,
+			.sk	= nfnlnet->nfnl,
+			.nlh	= nlh,
+			.nfmsg	= nlmsg_data(nlh),
+			.extack	= extack,
+		};
+
+		/* Sanity-check NFNL_MAX_ATTR_COUNT */
+		if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
+			rcu_read_unlock();
+			return -ENOMEM;
+		}
+
+		err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count,
+					   attr, attrlen,
+					   ss->cb[cb_id].policy, extack);
+		if (err < 0) {
+			rcu_read_unlock();
+			return err;
+		}
+
+		if (!nc->call) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+
+		switch (nc->type) {
+		case NFNL_CB_RCU:
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
+			rcu_read_unlock();
+			break;
+		case NFNL_CB_MUTEX:
+			rcu_read_unlock();
+			nfnl_lock(subsys_id);
+			if (nfnl_dereference_protected(subsys_id) != ss ||
+			    nfnetlink_find_client(type, ss) != nc) {
+				nfnl_unlock(subsys_id);
+				err = -EAGAIN;
+				break;
+			}
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
+			nfnl_unlock(subsys_id);
+			break;
+		default:
+			rcu_read_unlock();
+			err = -EINVAL;
+			break;
+		}
+		if (err == -EAGAIN)
+			goto replay;
+		return err;
+	}
+}
+
+struct nfnl_err {
+	struct list_head	head;
+	struct nlmsghdr		*nlh;
+	int			err;
+	struct netlink_ext_ack	extack;
+};
+
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err,
+			const struct netlink_ext_ack *extack)
+{
+	struct nfnl_err *nfnl_err;
+
+	nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
+	if (nfnl_err == NULL)
+		return -ENOMEM;
+
+	nfnl_err->nlh = nlh;
+	nfnl_err->err = err;
+	nfnl_err->extack = *extack;
+	list_add_tail(&nfnl_err->head, list);
+
+	return 0;
+}
+
+static void nfnl_err_del(struct nfnl_err *nfnl_err)
+{
+	list_del(&nfnl_err->head);
+	kfree(nfnl_err);
+}
+
+static void nfnl_err_reset(struct list_head *err_list)
+{
+	struct nfnl_err *nfnl_err, *next;
+
+	list_for_each_entry_safe(nfnl_err, next, err_list, head)
+		nfnl_err_del(nfnl_err);
+}
+
+static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
+{
+	struct nfnl_err *nfnl_err, *next;
+
+	list_for_each_entry_safe(nfnl_err, next, err_list, head) {
+		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err,
+			    &nfnl_err->extack);
+		nfnl_err_del(nfnl_err);
+	}
+}
+
+enum {
+	NFNL_BATCH_FAILURE	= (1 << 0),
+	NFNL_BATCH_DONE		= (1 << 1),
+	NFNL_BATCH_REPLAY	= (1 << 2),
+};
+
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+				u16 subsys_id, u32 genid)
+{
+	struct sk_buff *oskb = skb;
+	struct net *net = sock_net(skb->sk);
+	const struct nfnetlink_subsystem *ss;
+	const struct nfnl_callback *nc;
+	struct netlink_ext_ack extack;
+	LIST_HEAD(err_list);
+	u32 status;
+	int err;
+
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
+		return netlink_ack(skb, nlh, -EINVAL, NULL);
+replay:
+	status = 0;
+replay_abort:
+	skb = netlink_skb_clone(oskb, GFP_KERNEL);
+	if (!skb)
+		return netlink_ack(oskb, nlh, -ENOMEM, NULL);
+
+	nfnl_lock(subsys_id);
+	ss = nfnl_dereference_protected(subsys_id);
+	if (!ss) {
+#ifdef CONFIG_MODULES
+		nfnl_unlock(subsys_id);
+		request_module("nfnetlink-subsys-%d", subsys_id);
+		nfnl_lock(subsys_id);
+		ss = nfnl_dereference_protected(subsys_id);
+		if (!ss)
+#endif
+		{
+			nfnl_unlock(subsys_id);
+			netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+			return kfree_skb(skb);
+		}
+	}
+
+	if (!ss->valid_genid || !ss->commit || !ss->abort) {
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+		return kfree_skb(skb);
+	}
+
+	if (!try_module_get(ss->owner)) {
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+		return kfree_skb(skb);
+	}
+
+	if (!ss->valid_genid(net, genid)) {
+		module_put(ss->owner);
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -ERESTART, NULL);
+		return kfree_skb(skb);
+	}
+
+	nfnl_unlock(subsys_id);
+
+	while (skb->len >= nlmsg_total_size(0)) {
+		int msglen, type;
+
+		if (fatal_signal_pending(current)) {
+			nfnl_err_reset(&err_list);
+			err = -EINTR;
+			status = NFNL_BATCH_FAILURE;
+			goto done;
+		}
+
+		memset(&extack, 0, sizeof(extack));
+		nlh = nlmsg_hdr(skb);
+		err = 0;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < nlh->nlmsg_len ||
+		    nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+			nfnl_err_reset(&err_list);
+			status |= NFNL_BATCH_FAILURE;
+			goto done;
+		}
+
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		type = nlh->nlmsg_type;
+		if (type == NFNL_MSG_BATCH_BEGIN) {
+			/* Malformed: Batch begin twice */
+			nfnl_err_reset(&err_list);
+			status |= NFNL_BATCH_FAILURE;
+			goto done;
+		} else if (type == NFNL_MSG_BATCH_END) {
+			status |= NFNL_BATCH_DONE;
+			goto done;
+		} else if (type < NLMSG_MIN_TYPE) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* We only accept a batch with messages for the same
+		 * subsystem.
+		 */
+		if (NFNL_SUBSYS_ID(type) != subsys_id) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		nc = nfnetlink_find_client(type, ss);
+		if (!nc) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		if (nc->type != NFNL_CB_BATCH) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		{
+			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+			struct nfnl_net *nfnlnet = nfnl_pernet(net);
+			struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
+			struct nlattr *attr = (void *)nlh + min_len;
+			u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+			int attrlen = nlh->nlmsg_len - min_len;
+			struct nfnl_info info = {
+				.net	= net,
+				.sk	= nfnlnet->nfnl,
+				.nlh	= nlh,
+				.nfmsg	= nlmsg_data(nlh),
+				.extack	= &extack,
+			};
+
+			/* Sanity-check NFTA_MAX_ATTR */
+			if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
+				err = -ENOMEM;
+				goto ack;
+			}
+
+			err = nla_parse_deprecated(cda,
+						   ss->cb[cb_id].attr_count,
+						   attr, attrlen,
+						   ss->cb[cb_id].policy, NULL);
+			if (err < 0)
+				goto ack;
+
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
+
+			/* The lock was released to autoload some module, we
+			 * have to abort and start from scratch using the
+			 * original skb.
+			 */
+			if (err == -EAGAIN) {
+				status |= NFNL_BATCH_REPLAY;
+				goto done;
+			}
+		}
+ack:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+			/* Errors are delivered once the full batch has been
+			 * processed, this avoids that the same error is
+			 * reported several times when replaying the batch.
+			 */
+			if (err == -ENOMEM ||
+			    nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+				/* We failed to enqueue an error, reset the
+				 * list of errors and send OOM to userspace
+				 * pointing to the batch header.
+				 */
+				nfnl_err_reset(&err_list);
+				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM,
+					    NULL);
+				status |= NFNL_BATCH_FAILURE;
+				goto done;
+			}
+			/* We don't stop processing the batch on errors, thus,
+			 * userspace gets all the errors that the batch
+			 * triggers.
+			 */
+			if (err)
+				status |= NFNL_BATCH_FAILURE;
+		}
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+		skb_pull(skb, msglen);
+	}
+done:
+	if (status & NFNL_BATCH_REPLAY) {
+		ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
+		nfnl_err_reset(&err_list);
+		kfree_skb(skb);
+		module_put(ss->owner);
+		goto replay;
+	} else if (status == NFNL_BATCH_DONE) {
+		err = ss->commit(net, oskb);
+		if (err == -EAGAIN) {
+			status |= NFNL_BATCH_REPLAY;
+			goto done;
+		} else if (err) {
+			ss->abort(net, oskb, NFNL_ABORT_NONE);
+			netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+		}
+	} else {
+		enum nfnl_abort_action abort_action;
+
+		if (status & NFNL_BATCH_FAILURE)
+			abort_action = NFNL_ABORT_NONE;
+		else
+			abort_action = NFNL_ABORT_VALIDATE;
+
+		err = ss->abort(net, oskb, abort_action);
+		if (err == -EAGAIN) {
+			nfnl_err_reset(&err_list);
+			kfree_skb(skb);
+			module_put(ss->owner);
+			status |= NFNL_BATCH_FAILURE;
+			goto replay_abort;
+		}
+	}
+
+	nfnl_err_deliver(&err_list, oskb);
+	kfree_skb(skb);
+	module_put(ss->owner);
+}
+
+static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
+	[NFNL_BATCH_GENID]	= { .type = NLA_U32 },
+};
+
+static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+	struct nlattr *attr = (void *)nlh + min_len;
+	struct nlattr *cda[NFNL_BATCH_MAX + 1];
+	int attrlen = nlh->nlmsg_len - min_len;
+	struct nfgenmsg *nfgenmsg;
+	int msglen, err;
+	u32 gen_id = 0;
+	u16 res_id;
+
+	msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+	if (msglen > skb->len)
+		msglen = skb->len;
+
+	if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+		return;
+
+	err = nla_parse_deprecated(cda, NFNL_BATCH_MAX, attr, attrlen,
+				   nfnl_batch_policy, NULL);
+	if (err < 0) {
+		netlink_ack(skb, nlh, err, NULL);
+		return;
+	}
+	if (cda[NFNL_BATCH_GENID])
+		gen_id = ntohl(nla_get_be32(cda[NFNL_BATCH_GENID]));
+
+	nfgenmsg = nlmsg_data(nlh);
+	skb_pull(skb, msglen);
+	/* Work around old nft using host byte order */
+	if (nfgenmsg->res_id == (__force __be16)NFNL_SUBSYS_NFTABLES)
+		res_id = NFNL_SUBSYS_NFTABLES;
+	else
+		res_id = ntohs(nfgenmsg->res_id);
+
+	nfnetlink_rcv_batch(skb, nlh, res_id, gen_id);
+}
+
+static void nfnetlink_rcv(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+
+	if (skb->len < NLMSG_HDRLEN ||
+	    nlh->nlmsg_len < NLMSG_HDRLEN ||
+	    skb->len < nlh->nlmsg_len)
+		return;
+
+	if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
+		netlink_ack(skb, nlh, -EPERM, NULL);
+		return;
+	}
+
+	if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN)
+		nfnetlink_rcv_skb_batch(skb, nlh);
+	else
+		netlink_rcv_skb(skb, nfnetlink_rcv_msg);
+}
+
+static void nfnetlink_bind_event(struct net *net, unsigned int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	int type, group_bit;
+	u8 v;
+
+	/* All NFNLGRP_CONNTRACK_* group bits fit into u8.
+	 * The other groups are not relevant and can be ignored.
+	 */
+	if (group >= 8)
+		return;
+
+	type = nfnl_group2type[group];
+
+	switch (type) {
+	case NFNL_SUBSYS_CTNETLINK:
+		break;
+	case NFNL_SUBSYS_CTNETLINK_EXP:
+		break;
+	default:
+		return;
+	}
+
+	group_bit = (1 << group);
+
+	spin_lock(&nfnl_grp_active_lock);
+	v = READ_ONCE(nf_ctnetlink_has_listener);
+	if ((v & group_bit) == 0) {
+		v |= group_bit;
+
+		/* read concurrently without nfnl_grp_active_lock held. */
+		WRITE_ONCE(nf_ctnetlink_has_listener, v);
+	}
+
+	spin_unlock(&nfnl_grp_active_lock);
+#endif
+}
+
+static int nfnetlink_bind(struct net *net, int group)
+{
+	const struct nfnetlink_subsystem *ss;
+	int type;
+
+	if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
+		return 0;
+
+	type = nfnl_group2type[group];
+
+	rcu_read_lock();
+	ss = nfnetlink_get_subsys(type << 8);
+	rcu_read_unlock();
+	if (!ss)
+		request_module_nowait("nfnetlink-subsys-%d", type);
+
+	nfnetlink_bind_event(net, group);
+	return 0;
+}
+
+static void nfnetlink_unbind(struct net *net, int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	int type, group_bit;
+
+	if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
+		return;
+
+	type = nfnl_group2type[group];
+
+	switch (type) {
+	case NFNL_SUBSYS_CTNETLINK:
+		break;
+	case NFNL_SUBSYS_CTNETLINK_EXP:
+		break;
+	default:
+		return;
+	}
+
+	/* ctnetlink_has_listener is u8 */
+	if (group >= 8)
+		return;
+
+	group_bit = (1 << group);
+
+	spin_lock(&nfnl_grp_active_lock);
+	if (!nfnetlink_has_listeners(net, group)) {
+		u8 v = READ_ONCE(nf_ctnetlink_has_listener);
+
+		v &= ~group_bit;
+
+		/* read concurrently without nfnl_grp_active_lock held. */
+		WRITE_ONCE(nf_ctnetlink_has_listener, v);
+	}
+	spin_unlock(&nfnl_grp_active_lock);
+#endif
+}
+
+static int __net_init nfnetlink_net_init(struct net *net)
+{
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+	struct netlink_kernel_cfg cfg = {
+		.groups	= NFNLGRP_MAX,
+		.input	= nfnetlink_rcv,
+		.bind	= nfnetlink_bind,
+		.unbind	= nfnetlink_unbind,
+	};
+
+	nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
+	if (!nfnlnet->nfnl)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
+{
+	struct nfnl_net *nfnlnet;
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nfnlnet = nfnl_pernet(net);
+
+		netlink_kernel_release(nfnlnet->nfnl);
+	}
+}
+
+static struct pernet_operations nfnetlink_net_ops = {
+	.init		= nfnetlink_net_init,
+	.exit_batch	= nfnetlink_net_exit_batch,
+	.id		= &nfnetlink_pernet_id,
+	.size		= sizeof(struct nfnl_net),
+};
+
+static int __init nfnetlink_init(void)
+{
+	int i;
+
+	for (i = NFNLGRP_NONE + 1; i <= NFNLGRP_MAX; i++)
+		BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE);
+
+	for (i=0; i<NFNL_SUBSYS_COUNT; i++)
+		__mutex_init(&table[i].mutex, nfnl_lockdep_names[i], &nfnl_lockdep_keys[i]);
+
+	return register_pernet_subsys(&nfnetlink_net_ops);
+}
+
+static void __exit nfnetlink_exit(void)
+{
+	unregister_pernet_subsys(&nfnetlink_net_ops);
+}
+module_init(nfnetlink_init);
+module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
new file mode 100644
index 000000000..505f46a32
--- /dev/null
+++ b/net/netfilter/nfnetlink_acct.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/atomic.h>
+#include <linux/refcount.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
+
+struct nf_acct {
+	atomic64_t		pkts;
+	atomic64_t		bytes;
+	unsigned long		flags;
+	struct list_head	head;
+	refcount_t		refcnt;
+	char			name[NFACCT_NAME_MAX];
+	struct rcu_head		rcu_head;
+	char			data[];
+};
+
+struct nfacct_filter {
+	u32 value;
+	u32 mask;
+};
+
+struct nfnl_acct_net {
+	struct list_head        nfnl_acct_list;
+};
+
+static unsigned int nfnl_acct_net_id __read_mostly;
+
+static inline struct nfnl_acct_net *nfnl_acct_pernet(struct net *net)
+{
+	return net_generic(net, nfnl_acct_net_id);
+}
+
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+#define NFACCT_OVERQUOTA_BIT	2	/* NFACCT_F_OVERQUOTA */
+
+static int nfnl_acct_new(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
+{
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
+	struct nf_acct *nfacct, *matching = NULL;
+	unsigned int size = 0;
+	char *acct_name;
+	u32 flags = 0;
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+
+	acct_name = nla_data(tb[NFACCT_NAME]);
+	if (strlen(acct_name) == 0)
+		return -EINVAL;
+
+	list_for_each_entry(nfacct, &nfnl_acct_net->nfnl_acct_list, head) {
+		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+                if (info->nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		matching = nfacct;
+		break;
+        }
+
+	if (matching) {
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+			/* reset counters if you request a replacement. */
+			atomic64_set(&matching->pkts, 0);
+			atomic64_set(&matching->bytes, 0);
+			smp_mb__before_atomic();
+			/* reset overquota flag if quota is enabled. */
+			if ((matching->flags & NFACCT_F_QUOTA))
+				clear_bit(NFACCT_OVERQUOTA_BIT,
+					  &matching->flags);
+			return 0;
+		}
+		return -EBUSY;
+	}
+
+	if (tb[NFACCT_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+		if (flags & ~NFACCT_F_QUOTA)
+			return -EOPNOTSUPP;
+		if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+			return -EINVAL;
+		if (flags & NFACCT_F_OVERQUOTA)
+			return -EINVAL;
+		if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA])
+			return -EINVAL;
+
+		size += sizeof(u64);
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
+	if (nfacct == NULL)
+		return -ENOMEM;
+
+	if (flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)nfacct->data;
+
+		*quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+		nfacct->flags = flags;
+	}
+
+	nla_strscpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX);
+
+	if (tb[NFACCT_BYTES]) {
+		atomic64_set(&nfacct->bytes,
+			     be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
+	}
+	if (tb[NFACCT_PKTS]) {
+		atomic64_set(&nfacct->pkts,
+			     be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
+	}
+	refcount_set(&nfacct->refcnt, 1);
+	list_add_tail_rcu(&nfacct->head, &nfnl_acct_net->nfnl_acct_list);
+	return 0;
+}
+
+static int
+nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+		   int event, struct nf_acct *acct)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+	u64 pkts, bytes;
+	u32 old_flags;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_string(skb, NFACCT_NAME, acct->name))
+		goto nla_put_failure;
+
+	old_flags = acct->flags;
+	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
+		pkts = atomic64_xchg(&acct->pkts, 0);
+		bytes = atomic64_xchg(&acct->bytes, 0);
+		smp_mb__before_atomic();
+		if (acct->flags & NFACCT_F_QUOTA)
+			clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
+	} else {
+		pkts = atomic64_read(&acct->pkts);
+		bytes = atomic64_read(&acct->bytes);
+	}
+	if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts),
+			 NFACCT_PAD) ||
+	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
+			 NFACCT_PAD) ||
+	    nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
+		goto nla_put_failure;
+	if (acct->flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)acct->data;
+
+		if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
+		    nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota),
+				 NFACCT_PAD))
+			goto nla_put_failure;
+	}
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nf_acct *cur, *last;
+	const struct nfacct_filter *filter = cb->data;
+
+	if (cb->args[2])
+		return 0;
+
+	last = (struct nf_acct *)cb->args[1];
+	if (cb->args[1])
+		cb->args[1] = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_net->nfnl_acct_list, head) {
+		if (last) {
+			if (cur != last)
+				continue;
+
+			last = NULL;
+		}
+
+		if (filter && (cur->flags & filter->mask) != filter->value)
+			continue;
+
+		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
+				       cb->nlh->nlmsg_seq,
+				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+				       NFNL_MSG_ACCT_NEW, cur) < 0) {
+			cb->args[1] = (unsigned long)cur;
+			break;
+		}
+	}
+	if (!cb->args[1])
+		cb->args[2] = 1;
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int nfnl_acct_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
+	[NFACCT_FILTER_MASK]	= { .type = NLA_U32 },
+	[NFACCT_FILTER_VALUE]	= { .type = NLA_U32 },
+};
+
+static int nfnl_acct_start(struct netlink_callback *cb)
+{
+	const struct nlattr *const attr = cb->data;
+	struct nlattr *tb[NFACCT_FILTER_MAX + 1];
+	struct nfacct_filter *filter;
+	int err;
+
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested_deprecated(tb, NFACCT_FILTER_MAX, attr,
+					  filter_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
+		return -EINVAL;
+
+	filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
+	filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
+	cb->data = filter;
+
+	return 0;
+}
+
+static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
+{
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
+	int ret = -ENOENT;
+	struct nf_acct *cur;
+	char *acct_name;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nfnl_acct_dump,
+			.start = nfnl_acct_start,
+			.done = nfnl_acct_done,
+			.data = (void *)tb[NFACCT_FILTER],
+		};
+
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_net->nfnl_acct_list, head) {
+		struct sk_buff *skb2;
+
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					  NFNL_MSG_ACCT_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
+
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
+	}
+
+	return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int nfnl_acct_try_del(struct nf_acct *cur)
+{
+	int ret = 0;
+
+	/* We want to avoid races with nfnl_acct_put. So only when the current
+	 * refcnt is 1, we decrease it to 0.
+	 */
+	if (refcount_dec_if_one(&cur->refcnt)) {
+		/* We are protected by nfnl mutex. */
+		list_del_rcu(&cur->head);
+		kfree_rcu(cur, rcu_head);
+	} else {
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
+static int nfnl_acct_del(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
+{
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
+	struct nf_acct *cur, *tmp;
+	int ret = -ENOENT;
+	char *acct_name;
+
+	if (!tb[NFACCT_NAME]) {
+		list_for_each_entry_safe(cur, tmp, &nfnl_acct_net->nfnl_acct_list, head)
+			nfnl_acct_try_del(cur);
+
+		return 0;
+	}
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_net->nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+		ret = nfnl_acct_try_del(cur);
+		if (ret < 0)
+			return ret;
+
+		break;
+	}
+	return ret;
+}
+
+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
+	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
+	[NFACCT_BYTES] = { .type = NLA_U64 },
+	[NFACCT_PKTS] = { .type = NLA_U64 },
+	[NFACCT_FLAGS] = { .type = NLA_U32 },
+	[NFACCT_QUOTA] = { .type = NLA_U64 },
+	[NFACCT_FILTER] = {.type = NLA_NESTED },
+};
+
+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
+	[NFNL_MSG_ACCT_NEW] = {
+		.call		= nfnl_acct_new,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_GET] = {
+		.call		= nfnl_acct_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_GET_CTRZERO] = {
+		.call		= nfnl_acct_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_DEL] = {
+		.call		= nfnl_acct_del,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
+	.name				= "acct",
+	.subsys_id			= NFNL_SUBSYS_ACCT,
+	.cb_count			= NFNL_MSG_ACCT_MAX,
+	.cb				= nfnl_acct_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
+
+struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
+{
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nf_acct *cur, *acct = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_net->nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		if (!try_module_get(THIS_MODULE))
+			goto err;
+
+		if (!refcount_inc_not_zero(&cur->refcnt)) {
+			module_put(THIS_MODULE);
+			goto err;
+		}
+
+		acct = cur;
+		break;
+	}
+err:
+	rcu_read_unlock();
+	return acct;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
+
+void nfnl_acct_put(struct nf_acct *acct)
+{
+	if (refcount_dec_and_test(&acct->refcnt))
+		kfree_rcu(acct, rcu_head);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_put);
+
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	atomic64_inc(&nfacct->pkts);
+	atomic64_add(skb->len, &nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_update);
+
+static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
+{
+	int ret;
+	struct sk_buff *skb;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+				  nfacct);
+	if (ret <= 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nfnetlink_broadcast(net, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct)
+{
+	u64 now;
+	u64 *quota;
+	int ret = NFACCT_UNDERQUOTA;
+
+	/* no place here if we don't have a quota */
+	if (!(nfacct->flags & NFACCT_F_QUOTA))
+		return NFACCT_NO_QUOTA;
+
+	quota = (u64 *)nfacct->data;
+	now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+	       atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+	ret = now > *quota;
+
+	if (now >= *quota &&
+	    !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
+		nfnl_overquota_report(net, nfacct);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
+static int __net_init nfnl_acct_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&nfnl_acct_pernet(net)->nfnl_acct_list);
+
+	return 0;
+}
+
+static void __net_exit nfnl_acct_net_exit(struct net *net)
+{
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nf_acct *cur, *tmp;
+
+	list_for_each_entry_safe(cur, tmp, &nfnl_acct_net->nfnl_acct_list, head) {
+		list_del_rcu(&cur->head);
+
+		if (refcount_dec_and_test(&cur->refcnt))
+			kfree_rcu(cur, rcu_head);
+	}
+}
+
+static struct pernet_operations nfnl_acct_ops = {
+        .init   = nfnl_acct_net_init,
+        .exit   = nfnl_acct_net_exit,
+        .id     = &nfnl_acct_net_id,
+        .size   = sizeof(struct nfnl_acct_net),
+};
+
+static int __init nfnl_acct_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&nfnl_acct_ops);
+	if (ret < 0) {
+		pr_err("nfnl_acct_init: failed to register pernet ops\n");
+		goto err_out;
+	}
+
+	ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
+	if (ret < 0) {
+		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
+		goto cleanup_pernet;
+	}
+	return 0;
+
+cleanup_pernet:
+	unregister_pernet_subsys(&nfnl_acct_ops);
+err_out:
+	return ret;
+}
+
+static void __exit nfnl_acct_exit(void)
+{
+	nfnetlink_subsys_unregister(&nfnl_acct_subsys);
+	unregister_pernet_subsys(&nfnl_acct_ops);
+}
+
+module_init(nfnl_acct_init);
+module_exit(nfnl_acct_exit);
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
new file mode 100644
index 000000000..97248963a
--- /dev/null
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This software has been sponsored by Vyatta Inc. <http://www.vyatta.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/capability.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nfnetlink_cthelper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
+
+struct nfnl_cthelper {
+	struct list_head		list;
+	struct nf_conntrack_helper	helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
+static int
+nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	const struct nf_conn_help *help;
+	struct nf_conntrack_helper *helper;
+
+	help = nfct_help(ct);
+	if (help == NULL)
+		return NF_DROP;
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+	if (helper == NULL)
+		return NF_DROP;
+
+	/* This is a user-space helper not yet configured, skip. */
+	if ((helper->flags &
+	    (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) ==
+	     NF_CT_HELPER_F_USERSPACE)
+		return NF_ACCEPT;
+
+	/* If the user-space helper is not available, don't block traffic. */
+	return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS;
+}
+
+static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = {
+	[NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, },
+	[NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, },
+};
+
+static int
+nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
+			  const struct nlattr *attr)
+{
+	int err;
+	struct nlattr *tb[NFCTH_TUPLE_MAX+1];
+
+	err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr,
+					  nfnl_cthelper_tuple_pol, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
+		return -EINVAL;
+
+	/* Not all fields are initialized so first zero the tuple */
+	memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
+	tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
+	tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
+
+	return 0;
+}
+
+static int
+nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	const struct nf_conntrack_helper *helper;
+
+	if (attr == NULL)
+		return -EINVAL;
+
+	helper = rcu_dereference(help->helper);
+	if (!helper || helper->data_len == 0)
+		return -EINVAL;
+
+	nla_memcpy(help->data, attr, sizeof(help->data));
+	return 0;
+}
+
+static int
+nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	const struct nf_conn_help *help = nfct_help(ct);
+	const struct nf_conntrack_helper *helper;
+
+	helper = rcu_dereference(help->helper);
+	if (helper && helper->data_len &&
+	    nla_put(skb, CTA_HELP_INFO, helper->data_len, &help->data))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = {
+	[NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING,
+				.len = NF_CT_HELPER_NAME_LEN-1 },
+	[NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, },
+	[NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, },
+};
+
+static int
+nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
+			    const struct nlattr *attr)
+{
+	int err;
+	struct nlattr *tb[NFCTH_POLICY_MAX+1];
+
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr,
+					  nfnl_cthelper_expect_pol, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_NAME] ||
+	    !tb[NFCTH_POLICY_EXPECT_MAX] ||
+	    !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+		return -EINVAL;
+
+	nla_strscpy(expect_policy->name,
+		    tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN);
+	expect_policy->max_expected =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+	if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+		return -EINVAL;
+
+	expect_policy->timeout =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+	return 0;
+}
+
+static const struct nla_policy
+nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = {
+	[NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, },
+};
+
+static int
+nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
+				  const struct nlattr *attr)
+{
+	int i, ret;
+	struct nf_conntrack_expect_policy *expect_policy;
+	struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+	unsigned int class_max;
+
+	ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr,
+					  nfnl_cthelper_expect_policy_set,
+					  NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[NFCTH_POLICY_SET_NUM])
+		return -EINVAL;
+
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (class_max == 0)
+		return -EINVAL;
+	if (class_max > NF_CT_MAX_EXPECT_CLASSES)
+		return -EOVERFLOW;
+
+	expect_policy = kcalloc(class_max,
+				sizeof(struct nf_conntrack_expect_policy),
+				GFP_KERNEL);
+	if (expect_policy == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < class_max; i++) {
+		if (!tb[NFCTH_POLICY_SET+i])
+			goto err;
+
+		ret = nfnl_cthelper_expect_policy(&expect_policy[i],
+						  tb[NFCTH_POLICY_SET+i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	helper->expect_class_max = class_max - 1;
+	helper->expect_policy = expect_policy;
+	return 0;
+err:
+	kfree(expect_policy);
+	return -EINVAL;
+}
+
+static int
+nfnl_cthelper_create(const struct nlattr * const tb[],
+		     struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_helper *helper;
+	struct nfnl_cthelper *nfcth;
+	unsigned int size;
+	int ret;
+
+	if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
+		return -EINVAL;
+
+	nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+	if (nfcth == NULL)
+		return -ENOMEM;
+	helper = &nfcth->helper;
+
+	ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
+	if (ret < 0)
+		goto err1;
+
+	nla_strscpy(helper->name,
+		    tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN);
+	size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+	if (size > sizeof_field(struct nf_conn_help, data)) {
+		ret = -ENOMEM;
+		goto err2;
+	}
+	helper->data_len = size;
+
+	helper->flags |= NF_CT_HELPER_F_USERSPACE;
+	memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple));
+
+	helper->me = THIS_MODULE;
+	helper->help = nfnl_userspace_cthelper;
+	helper->from_nlattr = nfnl_cthelper_from_nlattr;
+	helper->to_nlattr = nfnl_cthelper_to_nlattr;
+
+	/* Default to queue number zero, this can be updated at any time. */
+	if (tb[NFCTH_QUEUE_NUM])
+		helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM]));
+
+	if (tb[NFCTH_STATUS]) {
+		int status = ntohl(nla_get_be32(tb[NFCTH_STATUS]));
+
+		switch(status) {
+		case NFCT_HELPER_STATUS_ENABLED:
+			helper->flags |= NF_CT_HELPER_F_CONFIGURED;
+			break;
+		case NFCT_HELPER_STATUS_DISABLED:
+			helper->flags &= ~NF_CT_HELPER_F_CONFIGURED;
+			break;
+		}
+	}
+
+	ret = nf_conntrack_helper_register(helper);
+	if (ret < 0)
+		goto err2;
+
+	list_add_tail(&nfcth->list, &nfnl_cthelper_list);
+	return 0;
+err2:
+	kfree(helper->expect_policy);
+err1:
+	kfree(nfcth);
+	return ret;
+}
+
+static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+				struct nf_conntrack_expect_policy *new_policy,
+				const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr,
+					  nfnl_cthelper_expect_pol, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_NAME] ||
+	    !tb[NFCTH_POLICY_EXPECT_MAX] ||
+	    !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+		return -EINVAL;
+
+	if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+		return -EBUSY;
+
+	new_policy->max_expected =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+	if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+		return -EINVAL;
+
+	new_policy->timeout =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+	return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+					   struct nf_conntrack_helper *helper)
+{
+	struct nf_conntrack_expect_policy *new_policy;
+	struct nf_conntrack_expect_policy *policy;
+	int i, ret = 0;
+
+	new_policy = kmalloc_array(helper->expect_class_max + 1,
+				   sizeof(*new_policy), GFP_KERNEL);
+	if (!new_policy)
+		return -ENOMEM;
+
+	/* Check first that all policy attributes are well-formed, so we don't
+	 * leave things in inconsistent state on errors.
+	 */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+		if (!tb[NFCTH_POLICY_SET + i]) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+						      &new_policy[i],
+						      tb[NFCTH_POLICY_SET + i]);
+		if (ret < 0)
+			goto err;
+	}
+	/* Now we can safely update them. */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+		policy = (struct nf_conntrack_expect_policy *)
+				&helper->expect_policy[i];
+		policy->max_expected = new_policy->max_expected;
+		policy->timeout	= new_policy->timeout;
+	}
+
+err:
+	kfree(new_policy);
+	return ret;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+				       const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+	unsigned int class_max;
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr,
+					  nfnl_cthelper_expect_policy_set,
+					  NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_SET_NUM])
+		return -EINVAL;
+
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (helper->expect_class_max + 1 != class_max)
+		return -EBUSY;
+
+	return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
+static int
+nfnl_cthelper_update(const struct nlattr * const tb[],
+		     struct nf_conntrack_helper *helper)
+{
+	u32 size;
+	int ret;
+
+	if (tb[NFCTH_PRIV_DATA_LEN]) {
+		size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+		if (size != helper->data_len)
+			return -EBUSY;
+	}
+
+	if (tb[NFCTH_POLICY]) {
+		ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
+		if (ret < 0)
+			return ret;
+	}
+	if (tb[NFCTH_QUEUE_NUM])
+		helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM]));
+
+	if (tb[NFCTH_STATUS]) {
+		int status = ntohl(nla_get_be32(tb[NFCTH_STATUS]));
+
+		switch(status) {
+		case NFCT_HELPER_STATUS_ENABLED:
+			helper->flags |= NF_CT_HELPER_F_CONFIGURED;
+			break;
+		case NFCT_HELPER_STATUS_DISABLED:
+			helper->flags &= ~NF_CT_HELPER_F_CONFIGURED;
+			break;
+		}
+	}
+	return 0;
+}
+
+static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
+{
+	const char *helper_name;
+	struct nf_conntrack_helper *cur, *helper = NULL;
+	struct nf_conntrack_tuple tuple;
+	struct nfnl_cthelper *nlcth;
+	int ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
+		return -EINVAL;
+
+	helper_name = nla_data(tb[NFCTH_NAME]);
+
+	ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]);
+	if (ret < 0)
+		return ret;
+
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+
+		if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
+
+		if ((tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		helper = cur;
+		break;
+	}
+
+	if (helper == NULL)
+		ret = nfnl_cthelper_create(tb, &tuple);
+	else
+		ret = nfnl_cthelper_update(tb, helper);
+
+	return ret;
+}
+
+static int
+nfnl_cthelper_dump_tuple(struct sk_buff *skb,
+			 struct nf_conntrack_helper *helper)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, NFCTH_TUPLE);
+	if (nest_parms == NULL)
+		goto nla_put_failure;
+
+	if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM,
+			 htons(helper->tuple.src.l3num)))
+		goto nla_put_failure;
+
+	if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+nfnl_cthelper_dump_policy(struct sk_buff *skb,
+			struct nf_conntrack_helper *helper)
+{
+	int i;
+	struct nlattr *nest_parms1, *nest_parms2;
+
+	nest_parms1 = nla_nest_start(skb, NFCTH_POLICY);
+	if (nest_parms1 == NULL)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
+			 htonl(helper->expect_class_max + 1)))
+		goto nla_put_failure;
+
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+		nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i));
+		if (nest_parms2 == NULL)
+			goto nla_put_failure;
+
+		if (nla_put_string(skb, NFCTH_POLICY_NAME,
+				   helper->expect_policy[i].name))
+			goto nla_put_failure;
+
+		if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX,
+				 htonl(helper->expect_policy[i].max_expected)))
+			goto nla_put_failure;
+
+		if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT,
+				 htonl(helper->expect_policy[i].timeout)))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest_parms2);
+	}
+	nla_nest_end(skb, nest_parms1);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+			int event, struct nf_conntrack_helper *helper)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+	int status;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_string(skb, NFCTH_NAME, helper->name))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num)))
+		goto nla_put_failure;
+
+	if (nfnl_cthelper_dump_tuple(skb, helper) < 0)
+		goto nla_put_failure;
+
+	if (nfnl_cthelper_dump_policy(skb, helper) < 0)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len)))
+		goto nla_put_failure;
+
+	if (helper->flags & NF_CT_HELPER_F_CONFIGURED)
+		status = NFCT_HELPER_STATUS_ENABLED;
+	else
+		status = NFCT_HELPER_STATUS_DISABLED;
+
+	if (nla_put_be32(skb, NFCTH_STATUS, htonl(status)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_helper *cur, *last;
+
+	rcu_read_lock();
+	last = (struct nf_conntrack_helper *)cb->args[1];
+	for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) {
+restart:
+		hlist_for_each_entry_rcu(cur,
+				&nf_ct_helper_hash[cb->args[0]], hnode) {
+
+			/* skip non-userspace conntrack helpers. */
+			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
+				continue;
+
+			if (cb->args[1]) {
+				if (cur != last)
+					continue;
+				cb->args[1] = 0;
+			}
+			if (nfnl_cthelper_fill_info(skb,
+					    NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+					    NFNL_MSG_CTHELPER_NEW, cur) < 0) {
+				cb->args[1] = (unsigned long)cur;
+				goto out;
+			}
+		}
+	}
+	if (cb->args[1]) {
+		cb->args[1] = 0;
+		goto restart;
+	}
+out:
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
+{
+	int ret = -ENOENT;
+	struct nf_conntrack_helper *cur;
+	struct sk_buff *skb2;
+	char *helper_name = NULL;
+	struct nf_conntrack_tuple tuple;
+	struct nfnl_cthelper *nlcth;
+	bool tuple_set = false;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nfnl_cthelper_dump_table,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	if (tb[NFCTH_NAME])
+		helper_name = nla_data(tb[NFCTH_NAME]);
+
+	if (tb[NFCTH_TUPLE]) {
+		ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]);
+		if (ret < 0)
+			return ret;
+
+		tuple_set = true;
+	}
+
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
+
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+					      info->nlh->nlmsg_seq,
+					      NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					      NFNL_MSG_CTHELPER_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
+
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
+	}
+
+	return ret;
+}
+
+static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
+{
+	char *helper_name = NULL;
+	struct nf_conntrack_helper *cur;
+	struct nf_conntrack_tuple tuple;
+	bool tuple_set = false, found = false;
+	struct nfnl_cthelper *nlcth, *n;
+	int j = 0, ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (tb[NFCTH_NAME])
+		helper_name = nla_data(tb[NFCTH_NAME]);
+
+	if (tb[NFCTH_TUPLE]) {
+		ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]);
+		if (ret < 0)
+			return ret;
+
+		tuple_set = true;
+	}
+
+	ret = -ENOENT;
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		j++;
+
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
+
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
+
+		if (refcount_dec_if_one(&cur->refcnt)) {
+			found = true;
+			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
+
+			list_del(&nlcth->list);
+			kfree(nlcth);
+		} else {
+			ret = -EBUSY;
+		}
+	}
+
+	/* Make sure we return success if we flush and there is no helpers */
+	return (found || j == 0) ? 0 : ret;
+}
+
+static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
+	[NFCTH_NAME] = { .type = NLA_NUL_STRING,
+			 .len = NF_CT_HELPER_NAME_LEN-1 },
+	[NFCTH_QUEUE_NUM] = { .type = NLA_U32, },
+	[NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, },
+	[NFCTH_STATUS] = { .type = NLA_U32, },
+};
+
+static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
+	[NFNL_MSG_CTHELPER_NEW]	= {
+		.call		= nfnl_cthelper_new,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
+	[NFNL_MSG_CTHELPER_GET] = {
+		.call		= nfnl_cthelper_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
+	[NFNL_MSG_CTHELPER_DEL]	= {
+		.call		= nfnl_cthelper_del,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfnl_cthelper_subsys = {
+	.name				= "cthelper",
+	.subsys_id			= NFNL_SUBSYS_CTHELPER,
+	.cb_count			= NFNL_MSG_CTHELPER_MAX,
+	.cb				= nfnl_cthelper_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER);
+
+static int __init nfnl_cthelper_init(void)
+{
+	int ret;
+
+	ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys);
+	if (ret < 0) {
+		pr_err("nfnl_cthelper: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+	return 0;
+err_out:
+	return ret;
+}
+
+static void __exit nfnl_cthelper_exit(void)
+{
+	struct nf_conntrack_helper *cur;
+	struct nfnl_cthelper *nlcth, *n;
+
+	nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
+
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+
+		nf_conntrack_helper_unregister(cur);
+		kfree(cur->expect_policy);
+		kfree(nlcth);
+	}
+}
+
+module_init(nfnl_cthelper_init);
+module_exit(nfnl_cthelper_exit);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
new file mode 100644
index 000000000..f466af4f8
--- /dev/null
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/rculist.h>
+#include <linux/rculist_nulls.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/security.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <net/netlink.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static unsigned int nfct_timeout_id __read_mostly;
+
+struct ctnl_timeout {
+	struct list_head	head;
+	struct list_head	free_head;
+	struct rcu_head		rcu_head;
+	refcount_t		refcnt;
+	char			name[CTNL_TIMEOUT_NAME_MAX];
+
+	/* must be at the end */
+	struct nf_ct_timeout	timeout;
+};
+
+struct nfct_timeout_pernet {
+	struct list_head	nfct_timeout_list;
+	struct list_head	nfct_timeout_freelist;
+};
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
+
+static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
+	[CTA_TIMEOUT_NAME]	= { .type = NLA_NUL_STRING,
+				    .len  = CTNL_TIMEOUT_NAME_MAX - 1},
+	[CTA_TIMEOUT_L3PROTO]	= { .type = NLA_U16 },
+	[CTA_TIMEOUT_L4PROTO]	= { .type = NLA_U8 },
+	[CTA_TIMEOUT_DATA]	= { .type = NLA_NESTED },
+};
+
+static struct nfct_timeout_pernet *nfct_timeout_pernet(struct net *net)
+{
+	return net_generic(net, nfct_timeout_id);
+}
+
+static int
+ctnl_timeout_parse_policy(void *timeout,
+			  const struct nf_conntrack_l4proto *l4proto,
+			  struct net *net, const struct nlattr *attr)
+{
+	struct nlattr **tb;
+	int ret = 0;
+
+	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+		     GFP_KERNEL);
+
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nla_parse_nested_deprecated(tb,
+					  l4proto->ctnl_timeout.nlattr_max,
+					  attr,
+					  l4proto->ctnl_timeout.nla_policy,
+					  NULL);
+	if (ret < 0)
+		goto err;
+
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
+
+err:
+	kfree(tb);
+	return ret;
+}
+
+static int cttimeout_new_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
+	__u16 l3num;
+	__u8 l4num;
+	const struct nf_conntrack_l4proto *l4proto;
+	struct ctnl_timeout *timeout, *matching = NULL;
+	char *name;
+	int ret;
+
+	if (!cda[CTA_TIMEOUT_NAME] ||
+	    !cda[CTA_TIMEOUT_L3PROTO] ||
+	    !cda[CTA_TIMEOUT_L4PROTO] ||
+	    !cda[CTA_TIMEOUT_DATA])
+		return -EINVAL;
+
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+
+	list_for_each_entry(timeout, &pernet->nfct_timeout_list, head) {
+		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		matching = timeout;
+		break;
+	}
+
+	if (matching) {
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+			/* You cannot replace one timeout policy by another of
+			 * different kind, sorry.
+			 */
+			if (matching->timeout.l3num != l3num ||
+			    matching->timeout.l4proto->l4proto != l4num)
+				return -EINVAL;
+
+			return ctnl_timeout_parse_policy(&matching->timeout.data,
+							 matching->timeout.l4proto,
+							 info->net,
+							 cda[CTA_TIMEOUT_DATA]);
+		}
+
+		return -EBUSY;
+	}
+
+	l4proto = nf_ct_l4proto_find(l4num);
+
+	/* This protocol is not supportted, skip. */
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
+		goto err_proto_put;
+	}
+
+	timeout = kzalloc(sizeof(struct ctnl_timeout) +
+			  l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
+	if (timeout == NULL) {
+		ret = -ENOMEM;
+		goto err_proto_put;
+	}
+
+	ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto,
+					info->net, cda[CTA_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err;
+
+	strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
+	timeout->timeout.l3num = l3num;
+	timeout->timeout.l4proto = l4proto;
+	refcount_set(&timeout->refcnt, 1);
+	__module_get(THIS_MODULE);
+	list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
+
+	return 0;
+err:
+	kfree(timeout);
+err_proto_put:
+	return ret;
+}
+
+static int
+ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+		       int event, struct ctnl_timeout *timeout)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+	const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
+	struct nlattr *nest_parms;
+	int ret;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
+	    nla_put_be16(skb, CTA_TIMEOUT_L3PROTO,
+			 htons(timeout->timeout.l3num)) ||
+	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) ||
+	    nla_put_be32(skb, CTA_TIMEOUT_USE,
+			 htonl(refcount_read(&timeout->refcnt))))
+		goto nla_put_failure;
+
+	nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data);
+	if (ret < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nfct_timeout_pernet *pernet;
+	struct net *net = sock_net(skb->sk);
+	struct ctnl_timeout *cur, *last;
+
+	if (cb->args[2])
+		return 0;
+
+	last = (struct ctnl_timeout *)cb->args[1];
+	if (cb->args[1])
+		cb->args[1] = 0;
+
+	rcu_read_lock();
+	pernet = nfct_timeout_pernet(net);
+	list_for_each_entry_rcu(cur, &pernet->nfct_timeout_list, head) {
+		if (last) {
+			if (cur != last)
+				continue;
+
+			last = NULL;
+		}
+		if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq,
+					   NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+					   IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
+			cb->args[1] = (unsigned long)cur;
+			break;
+		}
+	}
+	if (!cb->args[1])
+		cb->args[2] = 1;
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int cttimeout_get_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
+	int ret = -ENOENT;
+	char *name;
+	struct ctnl_timeout *cur;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnl_timeout_dump,
+		};
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!cda[CTA_TIMEOUT_NAME])
+		return -EINVAL;
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+
+	list_for_each_entry(cur, &pernet->nfct_timeout_list, head) {
+		struct sk_buff *skb2;
+
+		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
+					     info->nlh->nlmsg_seq,
+					     NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					     IPCTNL_MSG_TIMEOUT_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
+
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
+	}
+
+	return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
+{
+	int ret = 0;
+
+	/* We want to avoid races with ctnl_timeout_put. So only when the
+	 * current refcnt is 1, we decrease it to 0.
+	 */
+	if (refcount_dec_if_one(&timeout->refcnt)) {
+		/* We are protected by nfnl mutex. */
+		list_del_rcu(&timeout->head);
+		nf_ct_untimeout(net, &timeout->timeout);
+		kfree_rcu(timeout, rcu_head);
+	} else {
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
+static int cttimeout_del_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
+	struct ctnl_timeout *cur, *tmp;
+	int ret = -ENOENT;
+	char *name;
+
+	if (!cda[CTA_TIMEOUT_NAME]) {
+		list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list,
+					 head)
+			ctnl_timeout_try_del(info->net, cur);
+
+		return 0;
+	}
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+
+	list_for_each_entry(cur, &pernet->nfct_timeout_list, head) {
+		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		ret = ctnl_timeout_try_del(info->net, cur);
+		if (ret < 0)
+			return ret;
+
+		break;
+	}
+	return ret;
+}
+
+static int cttimeout_default_set(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	__u8 l4num;
+	int ret;
+
+	if (!cda[CTA_TIMEOUT_L3PROTO] ||
+	    !cda[CTA_TIMEOUT_L4PROTO] ||
+	    !cda[CTA_TIMEOUT_DATA])
+		return -EINVAL;
+
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+	l4proto = nf_ct_l4proto_find(l4num);
+
+	/* This protocol is not supported, skip. */
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
+	ret = ctnl_timeout_parse_policy(NULL, l4proto, info->net,
+					cda[CTA_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err;
+
+	return 0;
+err:
+	return ret;
+}
+
+static int
+cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
+			    u32 seq, u32 type, int event, u16 l3num,
+			    const struct nf_conntrack_l4proto *l4proto,
+			    const unsigned int *timeouts)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+	struct nlattr *nest_parms;
+	int ret;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
+	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
+		goto nla_put_failure;
+
+	nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+	if (ret < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int cttimeout_default_get(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts = NULL;
+	struct sk_buff *skb2;
+	__u16 l3num;
+	__u8 l4num;
+	int ret;
+
+	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
+		return -EINVAL;
+
+	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+	l4proto = nf_ct_l4proto_find(l4num);
+
+	if (l4proto->l4proto != l4num)
+		return -EOPNOTSUPP;
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		timeouts = &nf_icmp_pernet(info->net)->timeout;
+		break;
+	case IPPROTO_TCP:
+		timeouts = nf_tcp_pernet(info->net)->timeouts;
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		timeouts = nf_udp_pernet(info->net)->timeouts;
+		break;
+	case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+		timeouts = nf_dccp_pernet(info->net)->dccp_timeout;
+#endif
+		break;
+	case IPPROTO_ICMPV6:
+		timeouts = &nf_icmpv6_pernet(info->net)->timeout;
+		break;
+	case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+		timeouts = nf_sctp_pernet(info->net)->timeouts;
+#endif
+		break;
+	case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+		timeouts = nf_gre_pernet(info->net)->timeouts;
+#endif
+		break;
+	case 255:
+		timeouts = &nf_generic_pernet(info->net)->timeout;
+		break;
+	default:
+		WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
+		break;
+	}
+
+	if (!timeouts)
+		return -EOPNOTSUPP;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	ret = cttimeout_default_fill_info(info->net, skb2,
+					  NETLINK_CB(skb).portid,
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
+					  l3num, l4proto, timeouts);
+	if (ret <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
+
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+}
+
+static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
+						   const char *name)
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct ctnl_timeout *timeout, *matching = NULL;
+
+	list_for_each_entry_rcu(timeout, &pernet->nfct_timeout_list, head) {
+		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		if (!refcount_inc_not_zero(&timeout->refcnt))
+			goto err;
+		matching = timeout;
+		break;
+	}
+err:
+	return matching ? &matching->timeout : NULL;
+}
+
+static void ctnl_timeout_put(struct nf_ct_timeout *t)
+{
+	struct ctnl_timeout *timeout =
+		container_of(t, struct ctnl_timeout, timeout);
+
+	if (refcount_dec_and_test(&timeout->refcnt)) {
+		kfree_rcu(timeout, rcu_head);
+		module_put(THIS_MODULE);
+	}
+}
+
+static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
+	[IPCTNL_MSG_TIMEOUT_NEW] = {
+		.call		= cttimeout_new_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_GET] = {
+		.call		= cttimeout_get_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DELETE] = {
+		.call		= cttimeout_del_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_SET] = {
+		.call		= cttimeout_default_set,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_GET] = {
+		.call		= cttimeout_default_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+};
+
+static const struct nfnetlink_subsystem cttimeout_subsys = {
+	.name				= "conntrack_timeout",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_TIMEOUT,
+	.cb_count			= IPCTNL_MSG_TIMEOUT_MAX,
+	.cb				= cttimeout_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
+
+static int __net_init cttimeout_net_init(struct net *net)
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+
+	INIT_LIST_HEAD(&pernet->nfct_timeout_list);
+	INIT_LIST_HEAD(&pernet->nfct_timeout_freelist);
+
+	return 0;
+}
+
+static void __net_exit cttimeout_net_pre_exit(struct net *net)
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct ctnl_timeout *cur, *tmp;
+
+	list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
+		list_del_rcu(&cur->head);
+		list_add(&cur->free_head, &pernet->nfct_timeout_freelist);
+	}
+
+	/* core calls synchronize_rcu() after this */
+}
+
+static void __net_exit cttimeout_net_exit(struct net *net)
+{
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct ctnl_timeout *cur, *tmp;
+
+	if (list_empty(&pernet->nfct_timeout_freelist))
+		return;
+
+	nf_ct_untimeout(net, NULL);
+
+	list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) {
+		list_del(&cur->free_head);
+
+		if (refcount_dec_and_test(&cur->refcnt))
+			kfree_rcu(cur, rcu_head);
+	}
+}
+
+static struct pernet_operations cttimeout_ops = {
+	.init	= cttimeout_net_init,
+	.pre_exit = cttimeout_net_pre_exit,
+	.exit	= cttimeout_net_exit,
+	.id     = &nfct_timeout_id,
+	.size   = sizeof(struct nfct_timeout_pernet),
+};
+
+static const struct nf_ct_timeout_hooks hooks = {
+	.timeout_find_get = ctnl_timeout_find_get,
+	.timeout_put = ctnl_timeout_put,
+};
+
+static int __init cttimeout_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&cttimeout_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = nfnetlink_subsys_register(&cttimeout_subsys);
+	if (ret < 0) {
+		pr_err("cttimeout_init: cannot register cttimeout with "
+			"nfnetlink.\n");
+		goto err_out;
+	}
+	RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks);
+	return 0;
+
+err_out:
+	unregister_pernet_subsys(&cttimeout_ops);
+	return ret;
+}
+
+static int untimeout(struct nf_conn *ct, void *timeout)
+{
+	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+	if (timeout_ext)
+		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+	return 0;
+}
+
+static void __exit cttimeout_exit(void)
+{
+	nfnetlink_subsys_unregister(&cttimeout_subsys);
+
+	unregister_pernet_subsys(&cttimeout_ops);
+	RCU_INIT_POINTER(nf_ct_timeout_hook, NULL);
+
+	nf_ct_iterate_destroy(untimeout, NULL);
+}
+
+module_init(cttimeout_init);
+module_exit(cttimeout_exit);
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
new file mode 100644
index 000000000..8120aadf6
--- /dev/null
+++ b/net/netfilter/nfnetlink_hook.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021 Red Hat GmbH
+ *
+ * Author: Florian Westphal <fw@strlen.de>
+ */
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_hook.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = {
+	[NFNLA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFNLA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFNLA_HOOK_DEV]	= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
+	[NFNLA_HOOK_FUNCTION_NAME] = { .type = NLA_NUL_STRING,
+				       .len = KSYM_NAME_LEN, },
+	[NFNLA_HOOK_MODULE_NAME] = { .type = NLA_NUL_STRING,
+				     .len = MODULE_NAME_LEN, },
+	[NFNLA_HOOK_CHAIN_INFO] = { .type = NLA_NESTED, },
+};
+
+static int nf_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
+				     const struct nlmsghdr *nlh,
+				     struct netlink_dump_control *c)
+{
+	int err;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	rcu_read_unlock();
+	err = netlink_dump_start(nlsk, skb, nlh, c);
+	rcu_read_lock();
+	module_put(THIS_MODULE);
+
+	return err;
+}
+
+struct nfnl_dump_hook_data {
+	char devname[IFNAMSIZ];
+	unsigned long headv;
+	u8 hook;
+};
+
+static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
+					const struct nfnl_dump_hook_data *ctx,
+					unsigned int seq,
+					const struct nf_hook_ops *ops)
+{
+	struct net *net = sock_net(nlskb->sk);
+	struct nlattr *nest, *nest2;
+	struct nft_chain *chain;
+	int ret = 0;
+
+	if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES)
+		return 0;
+
+	chain = ops->priv;
+	if (WARN_ON_ONCE(!chain))
+		return 0;
+
+	if (!nft_is_active(net, chain))
+		return 0;
+
+	nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO);
+	if (!nest)
+		return -EMSGSIZE;
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE,
+			   htonl(NFNL_HOOK_TYPE_NFTABLES));
+	if (ret)
+		goto cancel_nest;
+
+	nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
+	if (!nest2)
+		goto cancel_nest;
+
+	ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
+	if (ret)
+		goto cancel_nest;
+
+	ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
+	if (ret)
+		goto cancel_nest;
+
+	ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
+	if (ret)
+		goto cancel_nest;
+
+	nla_nest_end(nlskb, nest2);
+	nla_nest_end(nlskb, nest);
+	return ret;
+
+cancel_nest:
+	nla_nest_cancel(nlskb, nest);
+	return -EMSGSIZE;
+}
+
+static int nfnl_hook_dump_one(struct sk_buff *nlskb,
+			      const struct nfnl_dump_hook_data *ctx,
+			      const struct nf_hook_ops *ops,
+			      int family, unsigned int seq)
+{
+	u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
+	unsigned int portid = NETLINK_CB(nlskb).portid;
+	struct nlmsghdr *nlh;
+	int ret = -EMSGSIZE;
+	u32 hooknum;
+#ifdef CONFIG_KALLSYMS
+	char sym[KSYM_SYMBOL_LEN];
+	char *module_name;
+#endif
+	nlh = nfnl_msg_put(nlskb, portid, seq, event,
+			   NLM_F_MULTI, family, NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nla_put_failure;
+
+#ifdef CONFIG_KALLSYMS
+	ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
+	if (ret >= sizeof(sym)) {
+		ret = -EINVAL;
+		goto nla_put_failure;
+	}
+
+	module_name = strstr(sym, " [");
+	if (module_name) {
+		char *end;
+
+		*module_name = '\0';
+		module_name += 2;
+		end = strchr(module_name, ']');
+		if (end) {
+			*end = 0;
+
+			ret = nla_put_string(nlskb, NFNLA_HOOK_MODULE_NAME, module_name);
+			if (ret)
+				goto nla_put_failure;
+		}
+	}
+
+	ret = nla_put_string(nlskb, NFNLA_HOOK_FUNCTION_NAME, sym);
+	if (ret)
+		goto nla_put_failure;
+#endif
+
+	if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
+		hooknum = NF_NETDEV_INGRESS;
+	else
+		hooknum = ops->hooknum;
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
+	if (ret)
+		goto nla_put_failure;
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_PRIORITY, htonl(ops->priority));
+	if (ret)
+		goto nla_put_failure;
+
+	ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops);
+	if (ret)
+		goto nla_put_failure;
+
+	nlmsg_end(nlskb, nlh);
+	return 0;
+nla_put_failure:
+	nlmsg_trim(nlskb, nlh);
+	return ret;
+}
+
+static const struct nf_hook_entries *
+nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
+{
+	const struct nf_hook_entries *hook_head = NULL;
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
+	struct net_device *netdev;
+#endif
+
+	switch (pf) {
+	case NFPROTO_IPV4:
+		if (hook >= ARRAY_SIZE(net->nf.hooks_ipv4))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
+		break;
+	case NFPROTO_IPV6:
+		if (hook >= ARRAY_SIZE(net->nf.hooks_ipv6))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
+		break;
+	case NFPROTO_ARP:
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+		if (hook >= ARRAY_SIZE(net->nf.hooks_arp))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
+#endif
+		break;
+	case NFPROTO_BRIDGE:
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+		if (hook >= ARRAY_SIZE(net->nf.hooks_bridge))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+#endif
+		break;
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
+	case NFPROTO_NETDEV:
+		if (hook >= NF_NETDEV_NUMHOOKS)
+			return ERR_PTR(-EOPNOTSUPP);
+
+		if (!dev)
+			return ERR_PTR(-ENODEV);
+
+		netdev = dev_get_by_name_rcu(net, dev);
+		if (!netdev)
+			return ERR_PTR(-ENODEV);
+
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (hook == NF_NETDEV_INGRESS)
+			return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+		if (hook == NF_NETDEV_EGRESS)
+			return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+		fallthrough;
+#endif
+	default:
+		return ERR_PTR(-EPROTONOSUPPORT);
+	}
+
+	return hook_head;
+}
+
+static int nfnl_hook_dump(struct sk_buff *nlskb,
+			  struct netlink_callback *cb)
+{
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nfnl_dump_hook_data *ctx = cb->data;
+	int err, family = nfmsg->nfgen_family;
+	struct net *net = sock_net(nlskb->sk);
+	struct nf_hook_ops * const *ops;
+	const struct nf_hook_entries *e;
+	unsigned int i = cb->args[0];
+
+	rcu_read_lock();
+
+	e = nfnl_hook_entries_head(family, ctx->hook, net, ctx->devname);
+	if (!e)
+		goto done;
+
+	if (IS_ERR(e)) {
+		cb->seq++;
+		goto done;
+	}
+
+	if ((unsigned long)e != ctx->headv || i >= e->num_hook_entries)
+		cb->seq++;
+
+	ops = nf_hook_entries_get_hook_ops(e);
+
+	for (; i < e->num_hook_entries; i++) {
+		err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
+					 cb->nlh->nlmsg_seq);
+		if (err)
+			break;
+	}
+
+done:
+	nl_dump_check_consistent(cb, nlmsg_hdr(nlskb));
+	rcu_read_unlock();
+	cb->args[0] = i;
+	return nlskb->len;
+}
+
+static int nfnl_hook_dump_start(struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nlattr * const *nla = cb->data;
+	struct nfnl_dump_hook_data *ctx = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	u8 family = nfmsg->nfgen_family;
+	char name[IFNAMSIZ] = "";
+	const void *head;
+	u32 hooknum;
+
+	hooknum = ntohl(nla_get_be32(nla[NFNLA_HOOK_HOOKNUM]));
+	if (hooknum > 255)
+		return -EINVAL;
+
+	if (family == NFPROTO_NETDEV) {
+		if (!nla[NFNLA_HOOK_DEV])
+			return -EINVAL;
+
+		nla_strscpy(name, nla[NFNLA_HOOK_DEV], sizeof(name));
+	}
+
+	rcu_read_lock();
+	/* Not dereferenced; for consistency check only */
+	head = nfnl_hook_entries_head(family, hooknum, net, name);
+	rcu_read_unlock();
+
+	if (head && IS_ERR(head))
+		return PTR_ERR(head);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	strscpy(ctx->devname, name, sizeof(ctx->devname));
+	ctx->headv = (unsigned long)head;
+	ctx->hook = hooknum;
+
+	cb->seq = 1;
+	cb->data = ctx;
+
+	return 0;
+}
+
+static int nfnl_hook_dump_stop(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+static int nfnl_hook_get(struct sk_buff *skb,
+			 const struct nfnl_info *info,
+			 const struct nlattr * const nla[])
+{
+	if (!nla[NFNLA_HOOK_HOOKNUM])
+		return -EINVAL;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nfnl_hook_dump_start,
+			.done = nfnl_hook_dump_stop,
+			.dump = nfnl_hook_dump,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nf_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct nfnl_callback nfnl_hook_cb[NFNL_MSG_HOOK_MAX] = {
+	[NFNL_MSG_HOOK_GET] = {
+		.call		= nfnl_hook_get,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFNLA_HOOK_MAX,
+		.policy		= nfnl_hook_nla_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfhook_subsys = {
+	.name				= "nfhook",
+	.subsys_id			= NFNL_SUBSYS_HOOK,
+	.cb_count			= NFNL_MSG_HOOK_MAX,
+	.cb				= nfnl_hook_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_HOOK);
+
+static int __init nfnetlink_hook_init(void)
+{
+	return nfnetlink_subsys_register(&nfhook_subsys);
+}
+
+static void __exit nfnetlink_hook_exit(void)
+{
+	nfnetlink_subsys_unregister(&nfhook_subsys);
+}
+
+module_init(nfnetlink_hook_init);
+module_exit(nfnetlink_hook_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("nfnetlink_hook: list registered netfilter hooks");
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
new file mode 100644
index 000000000..200a82a8f
--- /dev/null
+++ b/net/netfilter/nfnetlink_log.c
@@ -0,0 +1,1209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is a module which is used for logging packets to userspace via
+ * nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the old ipv4-only ipt_ULOG.c:
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
+
+#include <linux/atomic.h>
+#include <linux/refcount.h>
+
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+#include "../bridge/br_private.h"
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+#define NFULNL_COPY_DISABLED	0xff
+#define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
+#define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
+#define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
+/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+#define NFULNL_COPY_RANGE_MAX	(0xFFFF - NLA_HDRLEN)
+
+#define PRINTR(x, args...)	do { if (net_ratelimit()) \
+				     printk(x, ## args); } while (0);
+
+struct nfulnl_instance {
+	struct hlist_node hlist;	/* global list of instances */
+	spinlock_t lock;
+	refcount_t use;			/* use count */
+
+	unsigned int qlen;		/* number of nlmsgs in skb */
+	struct sk_buff *skb;		/* pre-allocatd skb */
+	struct timer_list timer;
+	struct net *net;
+	netns_tracker ns_tracker;
+	struct user_namespace *peer_user_ns;	/* User namespace of the peer process */
+	u32 peer_portid;		/* PORTID of the peer process */
+
+	/* configurable parameters */
+	unsigned int flushtimeout;	/* timeout until queue flush */
+	unsigned int nlbufsiz;		/* netlink buffer allocation size */
+	unsigned int qthreshold;	/* threshold of the queue */
+	u_int32_t copy_range;
+	u_int32_t seq;			/* instance-local sequential counter */
+	u_int16_t group_num;		/* number of this queue */
+	u_int16_t flags;
+	u_int8_t copy_mode;
+	struct rcu_head rcu;
+};
+
+#define INSTANCE_BUCKETS	16
+
+static unsigned int nfnl_log_net_id __read_mostly;
+
+struct nfnl_log_net {
+	spinlock_t instances_lock;
+	struct hlist_head instance_table[INSTANCE_BUCKETS];
+	atomic_t global_seq;
+};
+
+static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
+{
+	return net_generic(net, nfnl_log_net_id);
+}
+
+static inline u_int8_t instance_hashfn(u_int16_t group_num)
+{
+	return ((group_num & 0xff) % INSTANCE_BUCKETS);
+}
+
+static struct nfulnl_instance *
+__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
+{
+	struct hlist_head *head;
+	struct nfulnl_instance *inst;
+
+	head = &log->instance_table[instance_hashfn(group_num)];
+	hlist_for_each_entry_rcu(inst, head, hlist) {
+		if (inst->group_num == group_num)
+			return inst;
+	}
+	return NULL;
+}
+
+static inline void
+instance_get(struct nfulnl_instance *inst)
+{
+	refcount_inc(&inst->use);
+}
+
+static struct nfulnl_instance *
+instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
+{
+	struct nfulnl_instance *inst;
+
+	rcu_read_lock_bh();
+	inst = __instance_lookup(log, group_num);
+	if (inst && !refcount_inc_not_zero(&inst->use))
+		inst = NULL;
+	rcu_read_unlock_bh();
+
+	return inst;
+}
+
+static void nfulnl_instance_free_rcu(struct rcu_head *head)
+{
+	struct nfulnl_instance *inst =
+		container_of(head, struct nfulnl_instance, rcu);
+
+	put_net_track(inst->net, &inst->ns_tracker);
+	kfree(inst);
+	module_put(THIS_MODULE);
+}
+
+static void
+instance_put(struct nfulnl_instance *inst)
+{
+	if (inst && refcount_dec_and_test(&inst->use))
+		call_rcu(&inst->rcu, nfulnl_instance_free_rcu);
+}
+
+static void nfulnl_timer(struct timer_list *t);
+
+static struct nfulnl_instance *
+instance_create(struct net *net, u_int16_t group_num,
+		u32 portid, struct user_namespace *user_ns)
+{
+	struct nfulnl_instance *inst;
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
+	int err;
+
+	spin_lock_bh(&log->instances_lock);
+	if (__instance_lookup(log, group_num)) {
+		err = -EEXIST;
+		goto out_unlock;
+	}
+
+	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
+	if (!inst) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	if (!try_module_get(THIS_MODULE)) {
+		kfree(inst);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+	INIT_HLIST_NODE(&inst->hlist);
+	spin_lock_init(&inst->lock);
+	/* needs to be two, since we _put() after creation */
+	refcount_set(&inst->use, 2);
+
+	timer_setup(&inst->timer, nfulnl_timer, 0);
+
+	inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC);
+	inst->peer_user_ns = user_ns;
+	inst->peer_portid = portid;
+	inst->group_num = group_num;
+
+	inst->qthreshold 	= NFULNL_QTHRESH_DEFAULT;
+	inst->flushtimeout 	= NFULNL_TIMEOUT_DEFAULT;
+	inst->nlbufsiz 		= NFULNL_NLBUFSIZ_DEFAULT;
+	inst->copy_mode 	= NFULNL_COPY_PACKET;
+	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
+
+	hlist_add_head_rcu(&inst->hlist,
+		       &log->instance_table[instance_hashfn(group_num)]);
+
+
+	spin_unlock_bh(&log->instances_lock);
+
+	return inst;
+
+out_unlock:
+	spin_unlock_bh(&log->instances_lock);
+	return ERR_PTR(err);
+}
+
+static void __nfulnl_flush(struct nfulnl_instance *inst);
+
+/* called with BH disabled */
+static void
+__instance_destroy(struct nfulnl_instance *inst)
+{
+	/* first pull it out of the global list */
+	hlist_del_rcu(&inst->hlist);
+
+	/* then flush all pending packets from skb */
+
+	spin_lock(&inst->lock);
+
+	/* lockless readers wont be able to use us */
+	inst->copy_mode = NFULNL_COPY_DISABLED;
+
+	if (inst->skb)
+		__nfulnl_flush(inst);
+	spin_unlock(&inst->lock);
+
+	/* and finally put the refcount */
+	instance_put(inst);
+}
+
+static inline void
+instance_destroy(struct nfnl_log_net *log,
+		 struct nfulnl_instance *inst)
+{
+	spin_lock_bh(&log->instances_lock);
+	__instance_destroy(inst);
+	spin_unlock_bh(&log->instances_lock);
+}
+
+static int
+nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
+		  unsigned int range)
+{
+	int status = 0;
+
+	spin_lock_bh(&inst->lock);
+
+	switch (mode) {
+	case NFULNL_COPY_NONE:
+	case NFULNL_COPY_META:
+		inst->copy_mode = mode;
+		inst->copy_range = 0;
+		break;
+
+	case NFULNL_COPY_PACKET:
+		inst->copy_mode = mode;
+		if (range == 0)
+			range = NFULNL_COPY_RANGE_MAX;
+		inst->copy_range = min_t(unsigned int,
+					 range, NFULNL_COPY_RANGE_MAX);
+		break;
+
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	spin_unlock_bh(&inst->lock);
+
+	return status;
+}
+
+static int
+nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
+{
+	int status;
+
+	spin_lock_bh(&inst->lock);
+	if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
+		status = -ERANGE;
+	else if (nlbufsiz > 131072)
+		status = -ERANGE;
+	else {
+		inst->nlbufsiz = nlbufsiz;
+		status = 0;
+	}
+	spin_unlock_bh(&inst->lock);
+
+	return status;
+}
+
+static void
+nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
+{
+	spin_lock_bh(&inst->lock);
+	inst->flushtimeout = timeout;
+	spin_unlock_bh(&inst->lock);
+}
+
+static void
+nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
+{
+	spin_lock_bh(&inst->lock);
+	inst->qthreshold = qthresh;
+	spin_unlock_bh(&inst->lock);
+}
+
+static int
+nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
+{
+	spin_lock_bh(&inst->lock);
+	inst->flags = flags;
+	spin_unlock_bh(&inst->lock);
+
+	return 0;
+}
+
+static struct sk_buff *
+nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
+		 unsigned int pkt_size)
+{
+	struct sk_buff *skb;
+	unsigned int n;
+
+	/* alloc skb which should be big enough for a whole multipart
+	 * message.  WARNING: has to be <= 128k due to slab restrictions */
+
+	n = max(inst_size, pkt_size);
+	skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
+	if (!skb) {
+		if (n > pkt_size) {
+			/* try to allocate only as much as we need for current
+			 * packet */
+
+			skb = alloc_skb(pkt_size, GFP_ATOMIC);
+		}
+	}
+
+	return skb;
+}
+
+static void
+__nfulnl_send(struct nfulnl_instance *inst)
+{
+	if (inst->qlen > 1) {
+		struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
+						 NLMSG_DONE,
+						 sizeof(struct nfgenmsg),
+						 0);
+		if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
+			      inst->skb->len, skb_tailroom(inst->skb))) {
+			kfree_skb(inst->skb);
+			goto out;
+		}
+	}
+	nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid);
+out:
+	inst->qlen = 0;
+	inst->skb = NULL;
+}
+
+static void
+__nfulnl_flush(struct nfulnl_instance *inst)
+{
+	/* timer holds a reference */
+	if (del_timer(&inst->timer))
+		instance_put(inst);
+	if (inst->skb)
+		__nfulnl_send(inst);
+}
+
+static void
+nfulnl_timer(struct timer_list *t)
+{
+	struct nfulnl_instance *inst = from_timer(inst, t, timer);
+
+	spin_lock_bh(&inst->lock);
+	if (inst->skb)
+		__nfulnl_send(inst);
+	spin_unlock_bh(&inst->lock);
+	instance_put(inst);
+}
+
+static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
+{
+	u32 size = 0;
+
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		size += nla_total_size(0); /* nested */
+		size += nla_total_size(sizeof(u16)); /* id */
+		size += nla_total_size(sizeof(u16)); /* tag */
+	}
+
+	if (skb->network_header > skb->mac_header)
+		size += nla_total_size(skb->network_header - skb->mac_header);
+
+	return size;
+}
+
+static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
+{
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(inst->skb, NFULA_VLAN);
+		if (!nest)
+			goto nla_put_failure;
+
+		if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) ||
+		    nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto))
+			goto nla_put_failure;
+
+		nla_nest_end(inst->skb, nest);
+	}
+
+	if (skb->mac_header < skb->network_header) {
+		int len = (int)(skb->network_header - skb->mac_header);
+
+		if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+/* This is an inline function, we don't really care about a long
+ * list of arguments */
+static inline int
+__build_packet_message(struct nfnl_log_net *log,
+			struct nfulnl_instance *inst,
+			const struct sk_buff *skb,
+			unsigned int data_len,
+			u_int8_t pf,
+			unsigned int hooknum,
+			const struct net_device *indev,
+			const struct net_device *outdev,
+			const char *prefix, unsigned int plen,
+			const struct nfnl_ct_hook *nfnl_ct,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	struct nfulnl_msg_packet_hdr pmsg;
+	struct nlmsghdr *nlh;
+	sk_buff_data_t old_tail = inst->skb->tail;
+	struct sock *sk;
+	const unsigned char *hwhdrp;
+	ktime_t tstamp;
+
+	nlh = nfnl_msg_put(inst->skb, 0, 0,
+			   nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
+			   0, pf, NFNETLINK_V0, htons(inst->group_num));
+	if (!nlh)
+		return -1;
+
+	memset(&pmsg, 0, sizeof(pmsg));
+	pmsg.hw_protocol	= skb->protocol;
+	pmsg.hook		= hooknum;
+
+	if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
+		goto nla_put_failure;
+
+	if (prefix &&
+	    nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
+		goto nla_put_failure;
+
+	if (indev) {
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
+				 htonl(indev->ifindex)))
+			goto nla_put_failure;
+#else
+		if (pf == PF_BRIDGE) {
+			/* Case 1: outdev is physical input device, we need to
+			 * look for bridge group (when called from
+			 * netfilter_bridge) */
+			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+					 htonl(indev->ifindex)) ||
+			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_thresh or
+			 * nf_log_packet.
+			 */
+			    nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
+					 htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
+				goto nla_put_failure;
+		} else {
+			int physinif;
+
+			/* Case 2: indev is bridge group, we need to look for
+			 * physical device (when called from ipv4) */
+			if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
+					 htonl(indev->ifindex)))
+				goto nla_put_failure;
+
+			physinif = nf_bridge_get_physinif(skb);
+			if (physinif &&
+			    nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+					 htonl(physinif)))
+				goto nla_put_failure;
+		}
+#endif
+	}
+
+	if (outdev) {
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
+				 htonl(outdev->ifindex)))
+			goto nla_put_failure;
+#else
+		if (pf == PF_BRIDGE) {
+			/* Case 1: outdev is physical output device, we need to
+			 * look for bridge group (when called from
+			 * netfilter_bridge) */
+			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+					 htonl(outdev->ifindex)) ||
+			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_thresh or
+			 * nf_log_packet.
+			 */
+			    nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
+					 htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
+				goto nla_put_failure;
+		} else {
+			struct net_device *physoutdev;
+
+			/* Case 2: indev is a bridge group, we need to look
+			 * for physical device (when called from ipv4) */
+			if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
+					 htonl(outdev->ifindex)))
+				goto nla_put_failure;
+
+			physoutdev = nf_bridge_get_physoutdev(skb);
+			if (physoutdev &&
+			    nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+					 htonl(physoutdev->ifindex)))
+				goto nla_put_failure;
+		}
+#endif
+	}
+
+	if (skb->mark &&
+	    nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
+		goto nla_put_failure;
+
+	if (indev && skb->dev &&
+	    skb_mac_header_was_set(skb) &&
+	    skb_mac_header_len(skb) != 0) {
+		struct nfulnl_msg_packet_hw phw;
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(skb, phw.hw_addr);
+		if (len > 0) {
+			phw.hw_addrlen = htons(len);
+			if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
+				goto nla_put_failure;
+		}
+	}
+
+	if (indev && skb_mac_header_was_set(skb)) {
+		if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
+		    nla_put_be16(inst->skb, NFULA_HWLEN,
+				 htons(skb->dev->hard_header_len)))
+			goto nla_put_failure;
+
+		hwhdrp = skb_mac_header(skb);
+
+		if (skb->dev->type == ARPHRD_SIT)
+			hwhdrp -= ETH_HLEN;
+
+		if (hwhdrp >= skb->head &&
+		    nla_put(inst->skb, NFULA_HWHEADER,
+			    skb->dev->hard_header_len, hwhdrp))
+			goto nla_put_failure;
+	}
+
+	tstamp = skb_tstamp_cond(skb, false);
+	if (hooknum <= NF_INET_FORWARD && tstamp) {
+		struct nfulnl_msg_packet_timestamp ts;
+		struct timespec64 kts = ktime_to_timespec64(tstamp);
+		ts.sec = cpu_to_be64(kts.tv_sec);
+		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
+
+		if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
+			goto nla_put_failure;
+	}
+
+	/* UID */
+	sk = skb->sk;
+	if (sk && sk_fullsock(sk)) {
+		read_lock_bh(&sk->sk_callback_lock);
+		if (sk->sk_socket && sk->sk_socket->file) {
+			struct file *file = sk->sk_socket->file;
+			const struct cred *cred = file->f_cred;
+			struct user_namespace *user_ns = inst->peer_user_ns;
+			__be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
+			__be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
+			read_unlock_bh(&sk->sk_callback_lock);
+			if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
+			    nla_put_be32(inst->skb, NFULA_GID, gid))
+				goto nla_put_failure;
+		} else
+			read_unlock_bh(&sk->sk_callback_lock);
+	}
+
+	/* local sequence number */
+	if ((inst->flags & NFULNL_CFG_F_SEQ) &&
+	    nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
+		goto nla_put_failure;
+
+	/* global sequence number */
+	if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
+	    nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
+			 htonl(atomic_inc_return(&log->global_seq))))
+		goto nla_put_failure;
+
+	if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
+				 NFULA_CT, NFULA_CT_INFO) < 0)
+		goto nla_put_failure;
+
+	if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
+	    nfulnl_put_bridge(inst, skb) < 0)
+		goto nla_put_failure;
+
+	if (data_len) {
+		struct nlattr *nla;
+		int size = nla_attr_size(data_len);
+
+		if (skb_tailroom(inst->skb) < nla_total_size(data_len))
+			goto nla_put_failure;
+
+		nla = skb_put(inst->skb, nla_total_size(data_len));
+		nla->nla_type = NFULA_PAYLOAD;
+		nla->nla_len = size;
+
+		if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
+			BUG();
+	}
+
+	nlh->nlmsg_len = inst->skb->tail - old_tail;
+	return 0;
+
+nla_put_failure:
+	PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
+	return -1;
+}
+
+static const struct nf_loginfo default_loginfo = {
+	.type =		NF_LOG_TYPE_ULOG,
+	.u = {
+		.ulog = {
+			.copy_len	= 0xffff,
+			.group		= 0,
+			.qthreshold	= 1,
+		},
+	},
+};
+
+/* log handler for internal netfilter logging api */
+static void
+nfulnl_log_packet(struct net *net,
+		  u_int8_t pf,
+		  unsigned int hooknum,
+		  const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct nf_loginfo *li_user,
+		  const char *prefix)
+{
+	size_t size;
+	unsigned int data_len;
+	struct nfulnl_instance *inst;
+	const struct nf_loginfo *li;
+	unsigned int qthreshold;
+	unsigned int plen = 0;
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
+	const struct nfnl_ct_hook *nfnl_ct = NULL;
+	enum ip_conntrack_info ctinfo = 0;
+	struct nf_conn *ct = NULL;
+
+	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
+		li = li_user;
+	else
+		li = &default_loginfo;
+
+	inst = instance_lookup_get(log, li->u.ulog.group);
+	if (!inst)
+		return;
+
+	if (prefix)
+		plen = strlen(prefix) + 1;
+
+	/* FIXME: do we want to make the size calculation conditional based on
+	 * what is actually present?  way more branches and checks, but more
+	 * memory efficient... */
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
+		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+#endif
+		+ nla_total_size(sizeof(u_int32_t))	/* mark */
+		+ nla_total_size(sizeof(u_int32_t))	/* uid */
+		+ nla_total_size(sizeof(u_int32_t))	/* gid */
+		+ nla_total_size(plen)			/* prefix */
+		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
+		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+		+ nla_total_size(sizeof(struct nfgenmsg));	/* NLMSG_DONE */
+
+	if (in && skb_mac_header_was_set(skb)) {
+		size += nla_total_size(skb->dev->hard_header_len)
+			+ nla_total_size(sizeof(u_int16_t))	/* hwtype */
+			+ nla_total_size(sizeof(u_int16_t));	/* hwlen */
+	}
+
+	spin_lock_bh(&inst->lock);
+
+	if (inst->flags & NFULNL_CFG_F_SEQ)
+		size += nla_total_size(sizeof(u_int32_t));
+	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
+		size += nla_total_size(sizeof(u_int32_t));
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL) {
+			ct = nf_ct_get(skb, &ctinfo);
+			if (ct != NULL)
+				size += nfnl_ct->build_size(ct);
+		}
+	}
+#endif
+	if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
+		size += nfulnl_get_bridge_size(skb);
+
+	qthreshold = inst->qthreshold;
+	/* per-rule qthreshold overrides per-instance */
+	if (li->u.ulog.qthreshold)
+		if (qthreshold > li->u.ulog.qthreshold)
+			qthreshold = li->u.ulog.qthreshold;
+
+
+	switch (inst->copy_mode) {
+	case NFULNL_COPY_META:
+	case NFULNL_COPY_NONE:
+		data_len = 0;
+		break;
+
+	case NFULNL_COPY_PACKET:
+		data_len = inst->copy_range;
+		if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
+		    (li->u.ulog.copy_len < data_len))
+			data_len = li->u.ulog.copy_len;
+
+		if (data_len > skb->len)
+			data_len = skb->len;
+
+		size += nla_total_size(data_len);
+		break;
+
+	case NFULNL_COPY_DISABLED:
+	default:
+		goto unlock_and_release;
+	}
+
+	if (inst->skb && size > skb_tailroom(inst->skb)) {
+		/* either the queue len is too high or we don't have
+		 * enough room in the skb left. flush to userspace. */
+		__nfulnl_flush(inst);
+	}
+
+	if (!inst->skb) {
+		inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
+					     inst->nlbufsiz, size);
+		if (!inst->skb)
+			goto alloc_failure;
+	}
+
+	inst->qlen++;
+
+	__build_packet_message(log, inst, skb, data_len, pf,
+				hooknum, in, out, prefix, plen,
+				nfnl_ct, ct, ctinfo);
+
+	if (inst->qlen >= qthreshold)
+		__nfulnl_flush(inst);
+	/* timer_pending always called within inst->lock, so there
+	 * is no chance of a race here */
+	else if (!timer_pending(&inst->timer)) {
+		instance_get(inst);
+		inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
+		add_timer(&inst->timer);
+	}
+
+unlock_and_release:
+	spin_unlock_bh(&inst->lock);
+	instance_put(inst);
+	return;
+
+alloc_failure:
+	/* FIXME: statistics */
+	goto unlock_and_release;
+}
+
+static int
+nfulnl_rcv_nl_event(struct notifier_block *this,
+		   unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+	struct nfnl_log_net *log = nfnl_log_pernet(n->net);
+
+	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
+		int i;
+
+		/* destroy all instances for this portid */
+		spin_lock_bh(&log->instances_lock);
+		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+			struct hlist_node *t2;
+			struct nfulnl_instance *inst;
+			struct hlist_head *head = &log->instance_table[i];
+
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
+				if (n->portid == inst->peer_portid)
+					__instance_destroy(inst);
+			}
+		}
+		spin_unlock_bh(&log->instances_lock);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nfulnl_rtnl_notifier = {
+	.notifier_call	= nfulnl_rcv_nl_event,
+};
+
+static int nfulnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfula[])
+{
+	return -ENOTSUPP;
+}
+
+static struct nf_logger nfulnl_logger __read_mostly = {
+	.name	= "nfnetlink_log",
+	.type	= NF_LOG_TYPE_ULOG,
+	.logfn	= nfulnl_log_packet,
+	.me	= THIS_MODULE,
+};
+
+static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
+	[NFULA_CFG_CMD]		= { .len = sizeof(struct nfulnl_msg_config_cmd) },
+	[NFULA_CFG_MODE]	= { .len = sizeof(struct nfulnl_msg_config_mode) },
+	[NFULA_CFG_TIMEOUT]	= { .type = NLA_U32 },
+	[NFULA_CFG_QTHRESH]	= { .type = NLA_U32 },
+	[NFULA_CFG_NLBUFSIZ]	= { .type = NLA_U32 },
+	[NFULA_CFG_FLAGS]	= { .type = NLA_U16 },
+};
+
+static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfula[])
+{
+	struct nfnl_log_net *log = nfnl_log_pernet(info->net);
+	u_int16_t group_num = ntohs(info->nfmsg->res_id);
+	struct nfulnl_msg_config_cmd *cmd = NULL;
+	struct nfulnl_instance *inst;
+	u16 flags = 0;
+	int ret = 0;
+
+	if (nfula[NFULA_CFG_CMD]) {
+		u_int8_t pf = info->nfmsg->nfgen_family;
+		cmd = nla_data(nfula[NFULA_CFG_CMD]);
+
+		/* Commands without queue context */
+		switch (cmd->command) {
+		case NFULNL_CFG_CMD_PF_BIND:
+			return nf_log_bind_pf(info->net, pf, &nfulnl_logger);
+		case NFULNL_CFG_CMD_PF_UNBIND:
+			nf_log_unbind_pf(info->net, pf);
+			return 0;
+		}
+	}
+
+	inst = instance_lookup_get(log, group_num);
+	if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
+		ret = -EPERM;
+		goto out_put;
+	}
+
+	/* Check if we support these flags in first place, dependencies should
+	 * be there too not to break atomicity.
+	 */
+	if (nfula[NFULA_CFG_FLAGS]) {
+		flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
+
+		if ((flags & NFULNL_CFG_F_CONNTRACK) &&
+		    !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+			nfnl_unlock(NFNL_SUBSYS_ULOG);
+			request_module("ip_conntrack_netlink");
+			nfnl_lock(NFNL_SUBSYS_ULOG);
+			if (rcu_access_pointer(nfnl_ct_hook)) {
+				ret = -EAGAIN;
+				goto out_put;
+			}
+#endif
+			ret = -EOPNOTSUPP;
+			goto out_put;
+		}
+	}
+
+	if (cmd != NULL) {
+		switch (cmd->command) {
+		case NFULNL_CFG_CMD_BIND:
+			if (inst) {
+				ret = -EBUSY;
+				goto out_put;
+			}
+
+			inst = instance_create(info->net, group_num,
+					       NETLINK_CB(skb).portid,
+					       sk_user_ns(NETLINK_CB(skb).sk));
+			if (IS_ERR(inst)) {
+				ret = PTR_ERR(inst);
+				goto out;
+			}
+			break;
+		case NFULNL_CFG_CMD_UNBIND:
+			if (!inst) {
+				ret = -ENODEV;
+				goto out;
+			}
+
+			instance_destroy(log, inst);
+			goto out_put;
+		default:
+			ret = -ENOTSUPP;
+			goto out_put;
+		}
+	} else if (!inst) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (nfula[NFULA_CFG_MODE]) {
+		struct nfulnl_msg_config_mode *params =
+			nla_data(nfula[NFULA_CFG_MODE]);
+
+		nfulnl_set_mode(inst, params->copy_mode,
+				ntohl(params->copy_range));
+	}
+
+	if (nfula[NFULA_CFG_TIMEOUT]) {
+		__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
+
+		nfulnl_set_timeout(inst, ntohl(timeout));
+	}
+
+	if (nfula[NFULA_CFG_NLBUFSIZ]) {
+		__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
+
+		nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
+	}
+
+	if (nfula[NFULA_CFG_QTHRESH]) {
+		__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
+
+		nfulnl_set_qthresh(inst, ntohl(qthresh));
+	}
+
+	if (nfula[NFULA_CFG_FLAGS])
+		nfulnl_set_flags(inst, flags);
+
+out_put:
+	instance_put(inst);
+out:
+	return ret;
+}
+
+static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
+	[NFULNL_MSG_PACKET]	= {
+		.call		= nfulnl_recv_unsupp,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFULA_MAX,
+	},
+	[NFULNL_MSG_CONFIG]	= {
+		.call		= nfulnl_recv_config,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFULA_CFG_MAX,
+		.policy		= nfula_cfg_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfulnl_subsys = {
+	.name		= "log",
+	.subsys_id	= NFNL_SUBSYS_ULOG,
+	.cb_count	= NFULNL_MSG_MAX,
+	.cb		= nfulnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+	struct seq_net_private p;
+	unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct net *net, struct iter_state *st)
+{
+	struct nfnl_log_net *log;
+	if (!st)
+		return NULL;
+
+	log = nfnl_log_pernet(net);
+
+	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+		struct hlist_head *head = &log->instance_table[st->bucket];
+
+		if (!hlist_empty(head))
+			return rcu_dereference_bh(hlist_first_rcu(head));
+	}
+	return NULL;
+}
+
+static struct hlist_node *get_next(struct net *net, struct iter_state *st,
+				   struct hlist_node *h)
+{
+	h = rcu_dereference_bh(hlist_next_rcu(h));
+	while (!h) {
+		struct nfnl_log_net *log;
+		struct hlist_head *head;
+
+		if (++st->bucket >= INSTANCE_BUCKETS)
+			return NULL;
+
+		log = nfnl_log_pernet(net);
+		head = &log->instance_table[st->bucket];
+		h = rcu_dereference_bh(hlist_first_rcu(head));
+	}
+	return h;
+}
+
+static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
+				  loff_t pos)
+{
+	struct hlist_node *head;
+	head = get_first(net, st);
+
+	if (head)
+		while (pos && (head = get_next(net, st, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *s, loff_t *pos)
+	__acquires(rcu_bh)
+{
+	rcu_read_lock_bh();
+	return get_idx(seq_file_net(s), s->private, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return get_next(seq_file_net(s), s->private, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+	__releases(rcu_bh)
+{
+	rcu_read_unlock_bh();
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+	const struct nfulnl_instance *inst = v;
+
+	seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
+		   inst->group_num,
+		   inst->peer_portid, inst->qlen,
+		   inst->copy_mode, inst->copy_range,
+		   inst->flushtimeout, refcount_read(&inst->use));
+
+	return 0;
+}
+
+static const struct seq_operations nful_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show,
+};
+#endif /* PROC_FS */
+
+static int __net_init nfnl_log_net_init(struct net *net)
+{
+	unsigned int i;
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *proc;
+	kuid_t root_uid;
+	kgid_t root_gid;
+#endif
+
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		INIT_HLIST_HEAD(&log->instance_table[i]);
+	spin_lock_init(&log->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+	proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter,
+			&nful_seq_ops, sizeof(struct iter_state));
+	if (!proc)
+		return -ENOMEM;
+
+	root_uid = make_kuid(net->user_ns, 0);
+	root_gid = make_kgid(net->user_ns, 0);
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(proc, root_uid, root_gid);
+#endif
+	return 0;
+}
+
+static void __net_exit nfnl_log_net_exit(struct net *net)
+{
+	struct nfnl_log_net *log = nfnl_log_pernet(net);
+	unsigned int i;
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+#endif
+	nf_log_unset(net, &nfulnl_logger);
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
+}
+
+static struct pernet_operations nfnl_log_net_ops = {
+	.init	= nfnl_log_net_init,
+	.exit	= nfnl_log_net_exit,
+	.id	= &nfnl_log_net_id,
+	.size	= sizeof(struct nfnl_log_net),
+};
+
+static int __init nfnetlink_log_init(void)
+{
+	int status;
+
+	status = register_pernet_subsys(&nfnl_log_net_ops);
+	if (status < 0) {
+		pr_err("failed to register pernet ops\n");
+		goto out;
+	}
+
+	netlink_register_notifier(&nfulnl_rtnl_notifier);
+	status = nfnetlink_subsys_register(&nfulnl_subsys);
+	if (status < 0) {
+		pr_err("failed to create netlink socket\n");
+		goto cleanup_netlink_notifier;
+	}
+
+	status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
+	if (status < 0) {
+		pr_err("failed to register logger\n");
+		goto cleanup_subsys;
+	}
+
+	return status;
+
+cleanup_subsys:
+	nfnetlink_subsys_unregister(&nfulnl_subsys);
+cleanup_netlink_notifier:
+	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfnl_log_net_ops);
+out:
+	return status;
+}
+
+static void __exit nfnetlink_log_fini(void)
+{
+	nfnetlink_subsys_unregister(&nfulnl_subsys);
+	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfnl_log_net_ops);
+	nf_log_unregister(&nfulnl_logger);
+}
+
+MODULE_DESCRIPTION("netfilter userspace logging");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
+MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
+MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
+
+module_init(nfnetlink_log_init);
+module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
new file mode 100644
index 000000000..50723ba08
--- /dev/null
+++ b/net/netfilter/nfnetlink_osf.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/capability.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/nfnetlink_osf.h>
+
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+struct list_head nf_osf_fingers[2];
+EXPORT_SYMBOL_GPL(nf_osf_fingers);
+
+static inline int nf_osf_ttl(const struct sk_buff *skb,
+			     int ttl_check, unsigned char f_ttl)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct in_ifaddr *ifa;
+	int ret = 0;
+
+	if (ttl_check == NF_OSF_TTL_TRUE)
+		return ip->ttl == f_ttl;
+	if (ttl_check == NF_OSF_TTL_NOCHECK)
+		return 1;
+	else if (ip->ttl <= f_ttl)
+		return 1;
+
+	in_dev_for_each_ifa_rcu(ifa, in_dev) {
+		if (inet_ifa_match(ip->saddr, ifa)) {
+			ret = (ip->ttl == f_ttl);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+struct nf_osf_hdr_ctx {
+	bool			df;
+	u16			window;
+	u16			totlen;
+	const unsigned char	*optp;
+	unsigned int		optsize;
+};
+
+static bool nf_osf_match_one(const struct sk_buff *skb,
+			     const struct nf_osf_user_finger *f,
+			     int ttl_check,
+			     struct nf_osf_hdr_ctx *ctx)
+{
+	const __u8 *optpinit = ctx->optp;
+	unsigned int check_WSS = 0;
+	int fmatch = FMATCH_WRONG;
+	int foptsize, optnum;
+	u16 mss = 0;
+
+	if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl))
+		return false;
+
+	/*
+	 * Should not happen if userspace parser was written correctly.
+	 */
+	if (f->wss.wc >= OSF_WSS_MAX)
+		return false;
+
+	/* Check options */
+
+	foptsize = 0;
+	for (optnum = 0; optnum < f->opt_num; ++optnum)
+		foptsize += f->opt[optnum].length;
+
+	if (foptsize > MAX_IPOPTLEN ||
+	    ctx->optsize > MAX_IPOPTLEN ||
+	    ctx->optsize != foptsize)
+		return false;
+
+	check_WSS = f->wss.wc;
+
+	for (optnum = 0; optnum < f->opt_num; ++optnum) {
+		if (f->opt[optnum].kind == *ctx->optp) {
+			__u32 len = f->opt[optnum].length;
+			const __u8 *optend = ctx->optp + len;
+
+			fmatch = FMATCH_OK;
+
+			switch (*ctx->optp) {
+			case OSFOPT_MSS:
+				mss = ctx->optp[3];
+				mss <<= 8;
+				mss |= ctx->optp[2];
+
+				mss = ntohs((__force __be16)mss);
+				break;
+			case OSFOPT_TS:
+				break;
+			}
+
+			ctx->optp = optend;
+		} else
+			fmatch = FMATCH_OPT_WRONG;
+
+		if (fmatch != FMATCH_OK)
+			break;
+	}
+
+	if (fmatch != FMATCH_OPT_WRONG) {
+		fmatch = FMATCH_WRONG;
+
+		switch (check_WSS) {
+		case OSF_WSS_PLAIN:
+			if (f->wss.val == 0 || ctx->window == f->wss.val)
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MSS:
+			/*
+			 * Some smart modems decrease mangle MSS to
+			 * SMART_MSS_2, so we check standard, decreased
+			 * and the one provided in the fingerprint MSS
+			 * values.
+			 */
+#define SMART_MSS_1	1460
+#define SMART_MSS_2	1448
+			if (ctx->window == f->wss.val * mss ||
+			    ctx->window == f->wss.val * SMART_MSS_1 ||
+			    ctx->window == f->wss.val * SMART_MSS_2)
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MTU:
+			if (ctx->window == f->wss.val * (mss + 40) ||
+			    ctx->window == f->wss.val * (SMART_MSS_1 + 40) ||
+			    ctx->window == f->wss.val * (SMART_MSS_2 + 40))
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MODULO:
+			if ((ctx->window % f->wss.val) == 0)
+				fmatch = FMATCH_OK;
+			break;
+		}
+	}
+
+	if (fmatch != FMATCH_OK)
+		ctx->optp = optpinit;
+
+	return fmatch == FMATCH_OK;
+}
+
+static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
+						const struct sk_buff *skb,
+						const struct iphdr *ip,
+						unsigned char *opts,
+						struct tcphdr *_tcph)
+{
+	const struct tcphdr *tcp;
+
+	tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph);
+	if (!tcp)
+		return NULL;
+
+	if (!tcp->syn)
+		return NULL;
+
+	ctx->totlen = ntohs(ip->tot_len);
+	ctx->df = ntohs(ip->frag_off) & IP_DF;
+	ctx->window = ntohs(tcp->window);
+
+	if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+		ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+		ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+				sizeof(struct tcphdr), ctx->optsize, opts);
+		if (!ctx->optp)
+			return NULL;
+	}
+
+	return tcp;
+}
+
+bool
+nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+	     int hooknum, struct net_device *in, struct net_device *out,
+	     const struct nf_osf_info *info, struct net *net,
+	     const struct list_head *nf_osf_fingers)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct nf_osf_user_finger *f;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct nf_osf_finger *kf;
+	int fcount = 0, ttl_check;
+	int fmatch = FMATCH_WRONG;
+	struct nf_osf_hdr_ctx ctx;
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
+	if (!tcp)
+		return false;
+
+	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0;
+
+	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
+
+		f = &kf->finger;
+
+		if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
+			continue;
+
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
+			continue;
+
+		fmatch = FMATCH_OK;
+
+		fcount++;
+
+		if (info->flags & NF_OSF_LOG)
+			nf_log_packet(net, family, hooknum, skb,
+				      in, out, NULL,
+				      "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
+				      f->genre, f->version, f->subtype,
+				      &ip->saddr, ntohs(tcp->source),
+				      &ip->daddr, ntohs(tcp->dest),
+				      f->ttl - ip->ttl);
+
+		if ((info->flags & NF_OSF_LOG) &&
+		    info->loglevel == NF_OSF_LOGLEVEL_FIRST)
+			break;
+	}
+
+	if (!fcount && (info->flags & NF_OSF_LOG))
+		nf_log_packet(net, family, hooknum, skb, in, out, NULL,
+			      "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
+			      &ip->saddr, ntohs(tcp->source),
+			      &ip->daddr, ntohs(tcp->dest));
+
+	if (fcount)
+		fmatch = FMATCH_OK;
+
+	return fmatch == FMATCH_OK;
+}
+EXPORT_SYMBOL_GPL(nf_osf_match);
+
+bool nf_osf_find(const struct sk_buff *skb,
+		 const struct list_head *nf_osf_fingers,
+		 const int ttl_check, struct nf_osf_data *data)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct nf_osf_user_finger *f;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct nf_osf_finger *kf;
+	struct nf_osf_hdr_ctx ctx;
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+	bool found = false;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
+	if (!tcp)
+		return false;
+
+	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
+		f = &kf->finger;
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
+			continue;
+
+		data->genre = f->genre;
+		data->version = f->version;
+		found = true;
+		break;
+	}
+
+	return found;
+}
+EXPORT_SYMBOL_GPL(nf_osf_find);
+
+static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = {
+	[OSF_ATTR_FINGER]	= { .len = sizeof(struct nf_osf_user_finger) },
+};
+
+static int nfnl_osf_add_callback(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const osf_attrs[])
+{
+	struct nf_osf_user_finger *f;
+	struct nf_osf_finger *kf = NULL, *sf;
+	int err = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	if (f->opt_num > ARRAY_SIZE(f->opt))
+		return -EINVAL;
+
+	if (!memchr(f->genre, 0, MAXGENRELEN) ||
+	    !memchr(f->subtype, 0, MAXGENRELEN) ||
+	    !memchr(f->version, 0, MAXGENRELEN))
+		return -EINVAL;
+
+	kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
+	if (!kf)
+		return -ENOMEM;
+
+	memcpy(&kf->finger, f, sizeof(struct nf_osf_user_finger));
+
+	list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger)))
+			continue;
+
+		kfree(kf);
+		kf = NULL;
+
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
+			err = -EEXIST;
+		break;
+	}
+
+	/*
+	 * We are protected by nfnl mutex.
+	 */
+	if (kf)
+		list_add_tail_rcu(&kf->finger_entry, &nf_osf_fingers[!!f->df]);
+
+	return err;
+}
+
+static int nfnl_osf_remove_callback(struct sk_buff *skb,
+				    const struct nfnl_info *info,
+				    const struct nlattr * const osf_attrs[])
+{
+	struct nf_osf_user_finger *f;
+	struct nf_osf_finger *sf;
+	int err = -ENOENT;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger)))
+			continue;
+
+		/*
+		 * We are protected by nfnl mutex.
+		 */
+		list_del_rcu(&sf->finger_entry);
+		kfree_rcu(sf, rcu_head);
+
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static const struct nfnl_callback nfnl_osf_callbacks[OSF_MSG_MAX] = {
+	[OSF_MSG_ADD]	= {
+		.call		= nfnl_osf_add_callback,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= nfnl_osf_policy,
+	},
+	[OSF_MSG_REMOVE]	= {
+		.call		= nfnl_osf_remove_callback,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= nfnl_osf_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem nfnl_osf_subsys = {
+	.name			= "osf",
+	.subsys_id		= NFNL_SUBSYS_OSF,
+	.cb_count		= OSF_MSG_MAX,
+	.cb			= nfnl_osf_callbacks,
+};
+
+static int __init nfnl_osf_init(void)
+{
+	int err = -EINVAL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i)
+		INIT_LIST_HEAD(&nf_osf_fingers[i]);
+
+	err = nfnetlink_subsys_register(&nfnl_osf_subsys);
+	if (err < 0) {
+		pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
+		goto err_out_exit;
+	}
+	return 0;
+
+err_out_exit:
+	return err;
+}
+
+static void __exit nfnl_osf_fini(void)
+{
+	struct nf_osf_finger *f;
+	int i;
+
+	nfnetlink_subsys_unregister(&nfnl_osf_subsys);
+
+	rcu_read_lock();
+	for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i) {
+		list_for_each_entry_rcu(f, &nf_osf_fingers[i], finger_entry) {
+			list_del_rcu(&f->finger_entry);
+			kfree_rcu(f, rcu_head);
+		}
+	}
+	rcu_read_unlock();
+
+	rcu_barrier();
+}
+
+module_init(nfnl_osf_init);
+module_exit(nfnl_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 000000000..87a9009d5
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1617 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is a module which is used for queueing packets and communicating with
+ * userspace via nfnetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2007 by Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the old ipv4-only ip_queue.c:
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/netns/generic.h>
+
+#include <linux/atomic.h>
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+#include "../bridge/br_private.h"
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+#define NFQNL_QMAX_DEFAULT 1024
+
+/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
+ * includes the header length. Thus, the maximum packet length that we
+ * support is 65531 bytes. We send truncated packets if the specified length
+ * is larger than that.  Userspace can check for presence of NFQA_CAP_LEN
+ * attribute to detect truncation.
+ */
+#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
+
+struct nfqnl_instance {
+	struct hlist_node hlist;		/* global list of queues */
+	struct rcu_head rcu;
+
+	u32 peer_portid;
+	unsigned int queue_maxlen;
+	unsigned int copy_range;
+	unsigned int queue_dropped;
+	unsigned int queue_user_dropped;
+
+
+	u_int16_t queue_num;			/* number of this queue */
+	u_int8_t copy_mode;
+	u_int32_t flags;			/* Set using NFQA_CFG_FLAGS */
+/*
+ * Following fields are dirtied for each queued packet,
+ * keep them in same cache line if possible.
+ */
+	spinlock_t	lock	____cacheline_aligned_in_smp;
+	unsigned int	queue_total;
+	unsigned int	id_sequence;		/* 'sequence' of pkt ids */
+	struct list_head queue_list;		/* packets in queue */
+};
+
+typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
+
+static unsigned int nfnl_queue_net_id __read_mostly;
+
+#define INSTANCE_BUCKETS	16
+struct nfnl_queue_net {
+	spinlock_t instances_lock;
+	struct hlist_head instance_table[INSTANCE_BUCKETS];
+};
+
+static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
+{
+	return net_generic(net, nfnl_queue_net_id);
+}
+
+static inline u_int8_t instance_hashfn(u_int16_t queue_num)
+{
+	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
+}
+
+static struct nfqnl_instance *
+instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
+{
+	struct hlist_head *head;
+	struct nfqnl_instance *inst;
+
+	head = &q->instance_table[instance_hashfn(queue_num)];
+	hlist_for_each_entry_rcu(inst, head, hlist) {
+		if (inst->queue_num == queue_num)
+			return inst;
+	}
+	return NULL;
+}
+
+static struct nfqnl_instance *
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
+{
+	struct nfqnl_instance *inst;
+	unsigned int h;
+	int err;
+
+	spin_lock(&q->instances_lock);
+	if (instance_lookup(q, queue_num)) {
+		err = -EEXIST;
+		goto out_unlock;
+	}
+
+	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
+	if (!inst) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	inst->queue_num = queue_num;
+	inst->peer_portid = portid;
+	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
+	inst->copy_range = NFQNL_MAX_COPY_RANGE;
+	inst->copy_mode = NFQNL_COPY_NONE;
+	spin_lock_init(&inst->lock);
+	INIT_LIST_HEAD(&inst->queue_list);
+
+	if (!try_module_get(THIS_MODULE)) {
+		err = -EAGAIN;
+		goto out_free;
+	}
+
+	h = instance_hashfn(queue_num);
+	hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
+
+	spin_unlock(&q->instances_lock);
+
+	return inst;
+
+out_free:
+	kfree(inst);
+out_unlock:
+	spin_unlock(&q->instances_lock);
+	return ERR_PTR(err);
+}
+
+static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
+			unsigned long data);
+
+static void
+instance_destroy_rcu(struct rcu_head *head)
+{
+	struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
+						   rcu);
+
+	nfqnl_flush(inst, NULL, 0);
+	kfree(inst);
+	module_put(THIS_MODULE);
+}
+
+static void
+__instance_destroy(struct nfqnl_instance *inst)
+{
+	hlist_del_rcu(&inst->hlist);
+	call_rcu(&inst->rcu, instance_destroy_rcu);
+}
+
+static void
+instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
+{
+	spin_lock(&q->instances_lock);
+	__instance_destroy(inst);
+	spin_unlock(&q->instances_lock);
+}
+
+static inline void
+__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+       list_add_tail(&entry->list, &queue->queue_list);
+       queue->queue_total++;
+}
+
+static void
+__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+	list_del(&entry->list);
+	queue->queue_total--;
+}
+
+static struct nf_queue_entry *
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+{
+	struct nf_queue_entry *entry = NULL, *i;
+
+	spin_lock_bh(&queue->lock);
+
+	list_for_each_entry(i, &queue->queue_list, list) {
+		if (i->id == id) {
+			entry = i;
+			break;
+		}
+	}
+
+	if (entry)
+		__dequeue_entry(queue, entry);
+
+	spin_unlock_bh(&queue->lock);
+
+	return entry;
+}
+
+static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+	const struct nf_ct_hook *ct_hook;
+	int err;
+
+	if (verdict == NF_ACCEPT ||
+	    verdict == NF_REPEAT ||
+	    verdict == NF_STOP) {
+		rcu_read_lock();
+		ct_hook = rcu_dereference(nf_ct_hook);
+		if (ct_hook) {
+			err = ct_hook->update(entry->state.net, entry->skb);
+			if (err < 0)
+				verdict = NF_DROP;
+		}
+		rcu_read_unlock();
+	}
+	nf_reinject(entry, verdict);
+}
+
+static void
+nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
+{
+	struct nf_queue_entry *entry, *next;
+
+	spin_lock_bh(&queue->lock);
+	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
+		if (!cmpfn || cmpfn(entry, data)) {
+			list_del(&entry->list);
+			queue->queue_total--;
+			nfqnl_reinject(entry, NF_DROP);
+		}
+	}
+	spin_unlock_bh(&queue->lock);
+}
+
+static int
+nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
+		      bool csum_verify)
+{
+	__u32 flags = 0;
+
+	if (packet->ip_summed == CHECKSUM_PARTIAL)
+		flags = NFQA_SKB_CSUMNOTREADY;
+	else if (csum_verify)
+		flags = NFQA_SKB_CSUM_NOTVERIFIED;
+
+	if (skb_is_gso(packet))
+		flags |= NFQA_SKB_GSO;
+
+	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
+}
+
+static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
+{
+	const struct cred *cred;
+
+	if (!sk_fullsock(sk))
+		return 0;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk->sk_socket && sk->sk_socket->file) {
+		cred = sk->sk_socket->file->f_cred;
+		if (nla_put_be32(skb, NFQA_UID,
+		    htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFQA_GID,
+		    htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
+			goto nla_put_failure;
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+	return 0;
+
+nla_put_failure:
+	read_unlock_bh(&sk->sk_callback_lock);
+	return -1;
+}
+
+static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
+{
+	u32 seclen = 0;
+#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
+	if (!skb || !sk_fullsock(skb->sk))
+		return 0;
+
+	read_lock_bh(&skb->sk->sk_callback_lock);
+
+	if (skb->secmark)
+		security_secid_to_secctx(skb->secmark, secdata, &seclen);
+
+	read_unlock_bh(&skb->sk->sk_callback_lock);
+#endif
+	return seclen;
+}
+
+static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
+{
+	struct sk_buff *entskb = entry->skb;
+	u32 nlalen = 0;
+
+	if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
+		return 0;
+
+	if (skb_vlan_tag_present(entskb))
+		nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
+					 nla_total_size(sizeof(__be16)));
+
+	if (entskb->network_header > entskb->mac_header)
+		nlalen += nla_total_size((entskb->network_header -
+					  entskb->mac_header));
+
+	return nlalen;
+}
+
+static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
+{
+	struct sk_buff *entskb = entry->skb;
+
+	if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
+		return 0;
+
+	if (skb_vlan_tag_present(entskb)) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(skb, NFQA_VLAN);
+		if (!nest)
+			goto nla_put_failure;
+
+		if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) ||
+		    nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest);
+	}
+
+	if (entskb->mac_header < entskb->network_header) {
+		int len = (int)(entskb->network_header - entskb->mac_header);
+
+		if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct sk_buff *
+nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+			   struct nf_queue_entry *entry,
+			   __be32 **packet_id_ptr)
+{
+	size_t size;
+	size_t data_len = 0, cap_len = 0;
+	unsigned int hlen = 0;
+	struct sk_buff *skb;
+	struct nlattr *nla;
+	struct nfqnl_msg_packet_hdr *pmsg;
+	struct nlmsghdr *nlh;
+	struct sk_buff *entskb = entry->skb;
+	struct net_device *indev;
+	struct net_device *outdev;
+	struct nf_conn *ct = NULL;
+	enum ip_conntrack_info ctinfo = 0;
+	const struct nfnl_ct_hook *nfnl_ct;
+	bool csum_verify;
+	char *secdata = NULL;
+	u32 seclen = 0;
+	ktime_t tstamp;
+
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
+		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
+#endif
+		+ nla_total_size(sizeof(u_int32_t))	/* mark */
+		+ nla_total_size(sizeof(u_int32_t))	/* priority */
+		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */
+		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
+
+	tstamp = skb_tstamp_cond(entskb, false);
+	if (tstamp)
+		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
+
+	size += nfqnl_get_bridge_size(entry);
+
+	if (entry->state.hook <= NF_INET_FORWARD ||
+	   (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+		csum_verify = !skb_csum_unnecessary(entskb);
+	else
+		csum_verify = false;
+
+	outdev = entry->state.out;
+
+	switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
+	case NFQNL_COPY_META:
+	case NFQNL_COPY_NONE:
+		break;
+
+	case NFQNL_COPY_PACKET:
+		if (!(queue->flags & NFQA_CFG_F_GSO) &&
+		    entskb->ip_summed == CHECKSUM_PARTIAL &&
+		    skb_checksum_help(entskb))
+			return NULL;
+
+		data_len = READ_ONCE(queue->copy_range);
+		if (data_len > entskb->len)
+			data_len = entskb->len;
+
+		hlen = skb_zerocopy_headlen(entskb);
+		hlen = min_t(unsigned int, hlen, data_len);
+		size += sizeof(struct nlattr) + hlen;
+		cap_len = entskb->len;
+		break;
+	}
+
+	nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+		if (nfnl_ct != NULL) {
+			ct = nf_ct_get(entskb, &ctinfo);
+			if (ct != NULL)
+				size += nfnl_ct->build_size(ct);
+		}
+	}
+#endif
+
+	if (queue->flags & NFQA_CFG_F_UID_GID) {
+		size += (nla_total_size(sizeof(u_int32_t))	/* uid */
+			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
+	}
+
+	if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
+		seclen = nfqnl_get_sk_secctx(entskb, &secdata);
+		if (seclen)
+			size += nla_total_size(seclen);
+	}
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb) {
+		skb_tx_error(entskb);
+		goto nlmsg_failure;
+	}
+
+	nlh = nfnl_msg_put(skb, 0, 0,
+			   nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
+			   0, entry->state.pf, NFNETLINK_V0,
+			   htons(queue->queue_num));
+	if (!nlh) {
+		skb_tx_error(entskb);
+		kfree_skb(skb);
+		goto nlmsg_failure;
+	}
+
+	nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
+	pmsg = nla_data(nla);
+	pmsg->hw_protocol	= entskb->protocol;
+	pmsg->hook		= entry->state.hook;
+	*packet_id_ptr		= &pmsg->packet_id;
+
+	indev = entry->state.in;
+	if (indev) {
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
+			goto nla_put_failure;
+#else
+		if (entry->state.pf == PF_BRIDGE) {
+			/* Case 1: indev is physical input device, we need to
+			 * look for bridge group (when called from
+			 * netfilter_bridge) */
+			if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
+					 htonl(indev->ifindex)) ||
+			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
+			    nla_put_be32(skb, NFQA_IFINDEX_INDEV,
+					 htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
+				goto nla_put_failure;
+		} else {
+			int physinif;
+
+			/* Case 2: indev is bridge group, we need to look for
+			 * physical device (when called from ipv4) */
+			if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
+					 htonl(indev->ifindex)))
+				goto nla_put_failure;
+
+			physinif = nf_bridge_get_physinif(entskb);
+			if (physinif &&
+			    nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
+					 htonl(physinif)))
+				goto nla_put_failure;
+		}
+#endif
+	}
+
+	if (outdev) {
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+		if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
+			goto nla_put_failure;
+#else
+		if (entry->state.pf == PF_BRIDGE) {
+			/* Case 1: outdev is physical output device, we need to
+			 * look for bridge group (when called from
+			 * netfilter_bridge) */
+			if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+					 htonl(outdev->ifindex)) ||
+			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
+			    nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
+					 htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
+				goto nla_put_failure;
+		} else {
+			int physoutif;
+
+			/* Case 2: outdev is bridge group, we need to look for
+			 * physical output device (when called from ipv4) */
+			if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
+					 htonl(outdev->ifindex)))
+				goto nla_put_failure;
+
+			physoutif = nf_bridge_get_physoutif(entskb);
+			if (physoutif &&
+			    nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+					 htonl(physoutif)))
+				goto nla_put_failure;
+		}
+#endif
+	}
+
+	if (entskb->mark &&
+	    nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
+		goto nla_put_failure;
+
+	if (entskb->priority &&
+	    nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
+		goto nla_put_failure;
+
+	if (indev && entskb->dev &&
+	    skb_mac_header_was_set(entskb) &&
+	    skb_mac_header_len(entskb) != 0) {
+		struct nfqnl_msg_packet_hw phw;
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(entskb, phw.hw_addr);
+		if (len) {
+			phw.hw_addrlen = htons(len);
+			if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
+				goto nla_put_failure;
+		}
+	}
+
+	if (nfqnl_put_bridge(entry, skb) < 0)
+		goto nla_put_failure;
+
+	if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
+		struct nfqnl_msg_packet_timestamp ts;
+		struct timespec64 kts = ktime_to_timespec64(tstamp);
+
+		ts.sec = cpu_to_be64(kts.tv_sec);
+		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
+
+		if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
+			goto nla_put_failure;
+	}
+
+	if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
+	    nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
+		goto nla_put_failure;
+
+	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
+		goto nla_put_failure;
+
+	if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
+		goto nla_put_failure;
+
+	if (cap_len > data_len &&
+	    nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+		goto nla_put_failure;
+
+	if (nfqnl_put_packet_info(skb, entskb, csum_verify))
+		goto nla_put_failure;
+
+	if (data_len) {
+		struct nlattr *nla;
+
+		if (skb_tailroom(skb) < sizeof(*nla) + hlen)
+			goto nla_put_failure;
+
+		nla = skb_put(skb, sizeof(*nla));
+		nla->nla_type = NFQA_PAYLOAD;
+		nla->nla_len = nla_attr_size(data_len);
+
+		if (skb_zerocopy(skb, entskb, data_len, hlen))
+			goto nla_put_failure;
+	}
+
+	nlh->nlmsg_len = skb->len;
+	if (seclen)
+		security_release_secctx(secdata, seclen);
+	return skb;
+
+nla_put_failure:
+	skb_tx_error(entskb);
+	kfree_skb(skb);
+	net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+	if (seclen)
+		security_release_secctx(secdata, seclen);
+	return NULL;
+}
+
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+	const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+	if (ct && ((ct->status & flags) == IPS_DYING))
+		return true;
+#endif
+	return false;
+}
+
+static int
+__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
+			struct nf_queue_entry *entry)
+{
+	struct sk_buff *nskb;
+	int err = -ENOBUFS;
+	__be32 *packet_id_ptr;
+	int failopen = 0;
+
+	nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
+	if (nskb == NULL) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	spin_lock_bh(&queue->lock);
+
+	if (nf_ct_drop_unconfirmed(entry))
+		goto err_out_free_nskb;
+
+	if (queue->queue_total >= queue->queue_maxlen) {
+		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+			failopen = 1;
+			err = 0;
+		} else {
+			queue->queue_dropped++;
+			net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
+					     queue->queue_total);
+		}
+		goto err_out_free_nskb;
+	}
+	entry->id = ++queue->id_sequence;
+	*packet_id_ptr = htonl(entry->id);
+
+	/* nfnetlink_unicast will either free the nskb or add it to a socket */
+	err = nfnetlink_unicast(nskb, net, queue->peer_portid);
+	if (err < 0) {
+		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+			failopen = 1;
+			err = 0;
+		} else {
+			queue->queue_user_dropped++;
+		}
+		goto err_out_unlock;
+	}
+
+	__enqueue_entry(queue, entry);
+
+	spin_unlock_bh(&queue->lock);
+	return 0;
+
+err_out_free_nskb:
+	kfree_skb(nskb);
+err_out_unlock:
+	spin_unlock_bh(&queue->lock);
+	if (failopen)
+		nfqnl_reinject(entry, NF_ACCEPT);
+err_out:
+	return err;
+}
+
+static struct nf_queue_entry *
+nf_queue_entry_dup(struct nf_queue_entry *e)
+{
+	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
+
+	if (!entry)
+		return NULL;
+
+	if (nf_queue_entry_get_refs(entry))
+		return entry;
+
+	kfree(entry);
+	return NULL;
+}
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+/* When called from bridge netfilter, skb->data must point to MAC header
+ * before calling skb_gso_segment(). Else, original MAC header is lost
+ * and segmented skbs will be sent to wrong destination.
+ */
+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
+{
+	if (nf_bridge_info_get(skb))
+		__skb_push(skb, skb->network_header - skb->mac_header);
+}
+
+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
+{
+	if (nf_bridge_info_get(skb))
+		__skb_pull(skb, skb->network_header - skb->mac_header);
+}
+#else
+#define nf_bridge_adjust_skb_data(s) do {} while (0)
+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
+#endif
+
+static int
+__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
+			   struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+	int ret = -ENOMEM;
+	struct nf_queue_entry *entry_seg;
+
+	nf_bridge_adjust_segmented_data(skb);
+
+	if (skb->next == NULL) { /* last packet, no need to copy entry */
+		struct sk_buff *gso_skb = entry->skb;
+		entry->skb = skb;
+		ret = __nfqnl_enqueue_packet(net, queue, entry);
+		if (ret)
+			entry->skb = gso_skb;
+		return ret;
+	}
+
+	skb_mark_not_on_list(skb);
+
+	entry_seg = nf_queue_entry_dup(entry);
+	if (entry_seg) {
+		entry_seg->skb = skb;
+		ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
+		if (ret)
+			nf_queue_entry_free(entry_seg);
+	}
+	return ret;
+}
+
+static int
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+	unsigned int queued;
+	struct nfqnl_instance *queue;
+	struct sk_buff *skb, *segs, *nskb;
+	int err = -ENOBUFS;
+	struct net *net = entry->state.net;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	queue = instance_lookup(q, queuenum);
+	if (!queue)
+		return -ESRCH;
+
+	if (queue->copy_mode == NFQNL_COPY_NONE)
+		return -EINVAL;
+
+	skb = entry->skb;
+
+	switch (entry->state.pf) {
+	case NFPROTO_IPV4:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case NFPROTO_IPV6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
+
+	if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
+		return __nfqnl_enqueue_packet(net, queue, entry);
+
+	nf_bridge_adjust_skb_data(skb);
+	segs = skb_gso_segment(skb, 0);
+	/* Does not use PTR_ERR to limit the number of error codes that can be
+	 * returned by nf_queue.  For instance, callers rely on -ESRCH to
+	 * mean 'ignore this hook'.
+	 */
+	if (IS_ERR_OR_NULL(segs))
+		goto out_err;
+	queued = 0;
+	err = 0;
+	skb_list_walk_safe(segs, segs, nskb) {
+		if (err == 0)
+			err = __nfqnl_enqueue_packet_gso(net, queue,
+							segs, entry);
+		if (err == 0)
+			queued++;
+		else
+			kfree_skb(segs);
+	}
+
+	if (queued) {
+		if (err) /* some segments are already queued */
+			nf_queue_entry_free(entry);
+		kfree_skb(skb);
+		return 0;
+	}
+ out_err:
+	nf_bridge_adjust_segmented_data(skb);
+	return err;
+}
+
+static int
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+{
+	struct sk_buff *nskb;
+
+	if (diff < 0) {
+		unsigned int min_len = skb_transport_offset(e->skb);
+
+		if (data_len < min_len)
+			return -EINVAL;
+
+		if (pskb_trim(e->skb, data_len))
+			return -ENOMEM;
+	} else if (diff > 0) {
+		if (data_len > 0xFFFF)
+			return -EINVAL;
+		if (diff > skb_tailroom(e->skb)) {
+			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+					       diff, GFP_ATOMIC);
+			if (!nskb)
+				return -ENOMEM;
+			kfree_skb(e->skb);
+			e->skb = nskb;
+		}
+		skb_put(e->skb, diff);
+	}
+	if (skb_ensure_writable(e->skb, data_len))
+		return -ENOMEM;
+	skb_copy_to_linear_data(e->skb, data, data_len);
+	e->skb->ip_summed = CHECKSUM_NONE;
+	return 0;
+}
+
+static int
+nfqnl_set_mode(struct nfqnl_instance *queue,
+	       unsigned char mode, unsigned int range)
+{
+	int status = 0;
+
+	spin_lock_bh(&queue->lock);
+	switch (mode) {
+	case NFQNL_COPY_NONE:
+	case NFQNL_COPY_META:
+		queue->copy_mode = mode;
+		queue->copy_range = 0;
+		break;
+
+	case NFQNL_COPY_PACKET:
+		queue->copy_mode = mode;
+		if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
+			queue->copy_range = NFQNL_MAX_COPY_RANGE;
+		else
+			queue->copy_range = range;
+		break;
+
+	default:
+		status = -EINVAL;
+
+	}
+	spin_unlock_bh(&queue->lock);
+
+	return status;
+}
+
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	int physinif, physoutif;
+
+	physinif = nf_bridge_get_physinif(entry->skb);
+	physoutif = nf_bridge_get_physoutif(entry->skb);
+
+	if (physinif == ifindex || physoutif == ifindex)
+		return 1;
+#endif
+	if (entry->state.in)
+		if (entry->state.in->ifindex == ifindex)
+			return 1;
+	if (entry->state.out)
+		if (entry->state.out->ifindex == ifindex)
+			return 1;
+
+	return 0;
+}
+
+/* drop all packets with either indev or outdev == ifindex from all queue
+ * instances */
+static void
+nfqnl_dev_drop(struct net *net, int ifindex)
+{
+	int i;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+	rcu_read_lock();
+
+	for (i = 0; i < INSTANCE_BUCKETS; i++) {
+		struct nfqnl_instance *inst;
+		struct hlist_head *head = &q->instance_table[i];
+
+		hlist_for_each_entry_rcu(inst, head, hlist)
+			nfqnl_flush(inst, dev_cmp, ifindex);
+	}
+
+	rcu_read_unlock();
+}
+
+static int
+nfqnl_rcv_dev_event(struct notifier_block *this,
+		    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	/* Drop any packets associated with the downed device */
+	if (event == NETDEV_DOWN)
+		nfqnl_dev_drop(dev_net(dev), dev->ifindex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_dev_notifier = {
+	.notifier_call	= nfqnl_rcv_dev_event,
+};
+
+static void nfqnl_nf_hook_drop(struct net *net)
+{
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+	int i;
+
+	/* This function is also called on net namespace error unwind,
+	 * when pernet_ops->init() failed and ->exit() functions of the
+	 * previous pernet_ops gets called.
+	 *
+	 * This may result in a call to nfqnl_nf_hook_drop() before
+	 * struct nfnl_queue_net was allocated.
+	 */
+	if (!q)
+		return;
+
+	for (i = 0; i < INSTANCE_BUCKETS; i++) {
+		struct nfqnl_instance *inst;
+		struct hlist_head *head = &q->instance_table[i];
+
+		hlist_for_each_entry_rcu(inst, head, hlist)
+			nfqnl_flush(inst, NULL, 0);
+	}
+}
+
+static int
+nfqnl_rcv_nl_event(struct notifier_block *this,
+		   unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
+
+	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
+		int i;
+
+		/* destroy all instances for this portid */
+		spin_lock(&q->instances_lock);
+		for (i = 0; i < INSTANCE_BUCKETS; i++) {
+			struct hlist_node *t2;
+			struct nfqnl_instance *inst;
+			struct hlist_head *head = &q->instance_table[i];
+
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
+				if (n->portid == inst->peer_portid)
+					__instance_destroy(inst);
+			}
+		}
+		spin_unlock(&q->instances_lock);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_rtnl_notifier = {
+	.notifier_call	= nfqnl_rcv_nl_event,
+};
+
+static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = {
+	[NFQA_VLAN_TCI]		= { .type = NLA_U16},
+	[NFQA_VLAN_PROTO]	= { .type = NLA_U16},
+};
+
+static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
+	[NFQA_VERDICT_HDR]	= { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+	[NFQA_MARK]		= { .type = NLA_U32 },
+	[NFQA_PAYLOAD]		= { .type = NLA_UNSPEC },
+	[NFQA_CT]		= { .type = NLA_UNSPEC },
+	[NFQA_EXP]		= { .type = NLA_UNSPEC },
+	[NFQA_VLAN]		= { .type = NLA_NESTED },
+	[NFQA_PRIORITY]		= { .type = NLA_U32 },
+};
+
+static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
+	[NFQA_VERDICT_HDR]	= { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+	[NFQA_MARK]		= { .type = NLA_U32 },
+	[NFQA_PRIORITY]		= { .type = NLA_U32 },
+};
+
+static struct nfqnl_instance *
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
+{
+	struct nfqnl_instance *queue;
+
+	queue = instance_lookup(q, queue_num);
+	if (!queue)
+		return ERR_PTR(-ENODEV);
+
+	if (queue->peer_portid != nlportid)
+		return ERR_PTR(-EPERM);
+
+	return queue;
+}
+
+static struct nfqnl_msg_verdict_hdr*
+verdicthdr_get(const struct nlattr * const nfqa[])
+{
+	struct nfqnl_msg_verdict_hdr *vhdr;
+	unsigned int verdict;
+
+	if (!nfqa[NFQA_VERDICT_HDR])
+		return NULL;
+
+	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
+	verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
+	if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
+		return NULL;
+	return vhdr;
+}
+
+static int nfq_id_after(unsigned int id, unsigned int max)
+{
+	return (int)(id - max) > 0;
+}
+
+static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
+				    const struct nfnl_info *info,
+				    const struct nlattr * const nfqa[])
+{
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	u16 queue_num = ntohs(info->nfmsg->res_id);
+	struct nf_queue_entry *entry, *tmp;
+	struct nfqnl_msg_verdict_hdr *vhdr;
+	struct nfqnl_instance *queue;
+	unsigned int verdict, maxid;
+	LIST_HEAD(batch_list);
+
+	queue = verdict_instance_lookup(q, queue_num,
+					NETLINK_CB(skb).portid);
+	if (IS_ERR(queue))
+		return PTR_ERR(queue);
+
+	vhdr = verdicthdr_get(nfqa);
+	if (!vhdr)
+		return -EINVAL;
+
+	verdict = ntohl(vhdr->verdict);
+	maxid = ntohl(vhdr->id);
+
+	spin_lock_bh(&queue->lock);
+
+	list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
+		if (nfq_id_after(entry->id, maxid))
+			break;
+		__dequeue_entry(queue, entry);
+		list_add_tail(&entry->list, &batch_list);
+	}
+
+	spin_unlock_bh(&queue->lock);
+
+	if (list_empty(&batch_list))
+		return -ENOENT;
+
+	list_for_each_entry_safe(entry, tmp, &batch_list, list) {
+		if (nfqa[NFQA_MARK])
+			entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+
+		if (nfqa[NFQA_PRIORITY])
+			entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
+		nfqnl_reinject(entry, verdict);
+	}
+	return 0;
+}
+
+static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
+				      const struct nlmsghdr *nlh,
+				      const struct nlattr * const nfqa[],
+				      struct nf_queue_entry *entry,
+				      enum ip_conntrack_info *ctinfo)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(entry->skb, ctinfo);
+	if (ct == NULL)
+		return NULL;
+
+	if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
+		return NULL;
+
+	if (nfqa[NFQA_EXP])
+		nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
+				      NETLINK_CB(entry->skb).portid,
+				      nlmsg_report(nlh));
+	return ct;
+#else
+	return NULL;
+#endif
+}
+
+static int nfqa_parse_bridge(struct nf_queue_entry *entry,
+			     const struct nlattr * const nfqa[])
+{
+	if (nfqa[NFQA_VLAN]) {
+		struct nlattr *tb[NFQA_VLAN_MAX + 1];
+		int err;
+
+		err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX,
+						  nfqa[NFQA_VLAN],
+						  nfqa_vlan_policy, NULL);
+		if (err < 0)
+			return err;
+
+		if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
+			return -EINVAL;
+
+		__vlan_hwaccel_put_tag(entry->skb,
+			nla_get_be16(tb[NFQA_VLAN_PROTO]),
+			ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
+	}
+
+	if (nfqa[NFQA_L2HDR]) {
+		int mac_header_len = entry->skb->network_header -
+			entry->skb->mac_header;
+
+		if (mac_header_len != nla_len(nfqa[NFQA_L2HDR]))
+			return -EINVAL;
+		else if (mac_header_len > 0)
+			memcpy(skb_mac_header(entry->skb),
+			       nla_data(nfqa[NFQA_L2HDR]),
+			       mac_header_len);
+	}
+
+	return 0;
+}
+
+static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfqa[])
+{
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
+	const struct nfnl_ct_hook *nfnl_ct;
+	struct nfqnl_msg_verdict_hdr *vhdr;
+	enum ip_conntrack_info ctinfo;
+	struct nfqnl_instance *queue;
+	struct nf_queue_entry *entry;
+	struct nf_conn *ct = NULL;
+	unsigned int verdict;
+	int err;
+
+	queue = verdict_instance_lookup(q, queue_num,
+					NETLINK_CB(skb).portid);
+	if (IS_ERR(queue))
+		return PTR_ERR(queue);
+
+	vhdr = verdicthdr_get(nfqa);
+	if (!vhdr)
+		return -EINVAL;
+
+	verdict = ntohl(vhdr->verdict);
+
+	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
+	if (entry == NULL)
+		return -ENOENT;
+
+	/* rcu lock already held from nfnl->call_rcu. */
+	nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
+	if (nfqa[NFQA_CT]) {
+		if (nfnl_ct != NULL)
+			ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
+					    &ctinfo);
+	}
+
+	if (entry->state.pf == PF_BRIDGE) {
+		err = nfqa_parse_bridge(entry, nfqa);
+		if (err < 0)
+			return err;
+	}
+
+	if (nfqa[NFQA_PAYLOAD]) {
+		u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
+		int diff = payload_len - entry->skb->len;
+
+		if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
+				 payload_len, entry, diff) < 0)
+			verdict = NF_DROP;
+
+		if (ct && diff)
+			nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
+	}
+
+	if (nfqa[NFQA_MARK])
+		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+
+	if (nfqa[NFQA_PRIORITY])
+		entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
+	nfqnl_reinject(entry, verdict);
+	return 0;
+}
+
+static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const cda[])
+{
+	return -ENOTSUPP;
+}
+
+static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
+	[NFQA_CFG_CMD]		= { .len = sizeof(struct nfqnl_msg_config_cmd) },
+	[NFQA_CFG_PARAMS]	= { .len = sizeof(struct nfqnl_msg_config_params) },
+	[NFQA_CFG_QUEUE_MAXLEN]	= { .type = NLA_U32 },
+	[NFQA_CFG_MASK]		= { .type = NLA_U32 },
+	[NFQA_CFG_FLAGS]	= { .type = NLA_U32 },
+};
+
+static const struct nf_queue_handler nfqh = {
+	.outfn		= nfqnl_enqueue_packet,
+	.nf_hook_drop	= nfqnl_nf_hook_drop,
+};
+
+static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nfqa[])
+{
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
+	struct nfqnl_msg_config_cmd *cmd = NULL;
+	struct nfqnl_instance *queue;
+	__u32 flags = 0, mask = 0;
+	int ret = 0;
+
+	if (nfqa[NFQA_CFG_CMD]) {
+		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
+
+		/* Obsolete commands without queue context */
+		switch (cmd->command) {
+		case NFQNL_CFG_CMD_PF_BIND: return 0;
+		case NFQNL_CFG_CMD_PF_UNBIND: return 0;
+		}
+	}
+
+	/* Check if we support these flags in first place, dependencies should
+	 * be there too not to break atomicity.
+	 */
+	if (nfqa[NFQA_CFG_FLAGS]) {
+		if (!nfqa[NFQA_CFG_MASK]) {
+			/* A mask is needed to specify which flags are being
+			 * changed.
+			 */
+			return -EINVAL;
+		}
+
+		flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
+		mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
+
+		if (flags >= NFQA_CFG_F_MAX)
+			return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+		if (flags & mask & NFQA_CFG_F_SECCTX)
+			return -EOPNOTSUPP;
+#endif
+		if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
+		    !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+			nfnl_unlock(NFNL_SUBSYS_QUEUE);
+			request_module("ip_conntrack_netlink");
+			nfnl_lock(NFNL_SUBSYS_QUEUE);
+			if (rcu_access_pointer(nfnl_ct_hook))
+				return -EAGAIN;
+#endif
+			return -EOPNOTSUPP;
+		}
+	}
+
+	rcu_read_lock();
+	queue = instance_lookup(q, queue_num);
+	if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
+		ret = -EPERM;
+		goto err_out_unlock;
+	}
+
+	if (cmd != NULL) {
+		switch (cmd->command) {
+		case NFQNL_CFG_CMD_BIND:
+			if (queue) {
+				ret = -EBUSY;
+				goto err_out_unlock;
+			}
+			queue = instance_create(q, queue_num,
+						NETLINK_CB(skb).portid);
+			if (IS_ERR(queue)) {
+				ret = PTR_ERR(queue);
+				goto err_out_unlock;
+			}
+			break;
+		case NFQNL_CFG_CMD_UNBIND:
+			if (!queue) {
+				ret = -ENODEV;
+				goto err_out_unlock;
+			}
+			instance_destroy(q, queue);
+			goto err_out_unlock;
+		case NFQNL_CFG_CMD_PF_BIND:
+		case NFQNL_CFG_CMD_PF_UNBIND:
+			break;
+		default:
+			ret = -ENOTSUPP;
+			goto err_out_unlock;
+		}
+	}
+
+	if (!queue) {
+		ret = -ENODEV;
+		goto err_out_unlock;
+	}
+
+	if (nfqa[NFQA_CFG_PARAMS]) {
+		struct nfqnl_msg_config_params *params =
+			nla_data(nfqa[NFQA_CFG_PARAMS]);
+
+		nfqnl_set_mode(queue, params->copy_mode,
+				ntohl(params->copy_range));
+	}
+
+	if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
+		__be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
+
+		spin_lock_bh(&queue->lock);
+		queue->queue_maxlen = ntohl(*queue_maxlen);
+		spin_unlock_bh(&queue->lock);
+	}
+
+	if (nfqa[NFQA_CFG_FLAGS]) {
+		spin_lock_bh(&queue->lock);
+		queue->flags &= ~mask;
+		queue->flags |= flags & mask;
+		spin_unlock_bh(&queue->lock);
+	}
+
+err_out_unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
+	[NFQNL_MSG_PACKET]	= {
+		.call		= nfqnl_recv_unsupp,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+	},
+	[NFQNL_MSG_VERDICT]	= {
+		.call		= nfqnl_recv_verdict,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+		.policy		= nfqa_verdict_policy
+	},
+	[NFQNL_MSG_CONFIG]	= {
+		.call		= nfqnl_recv_config,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFQA_CFG_MAX,
+		.policy		= nfqa_cfg_policy
+	},
+	[NFQNL_MSG_VERDICT_BATCH] = {
+		.call		= nfqnl_recv_verdict_batch,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+		.policy		= nfqa_verdict_batch_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfqnl_subsys = {
+	.name		= "nf_queue",
+	.subsys_id	= NFNL_SUBSYS_QUEUE,
+	.cb_count	= NFQNL_MSG_MAX,
+	.cb		= nfqnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+	struct seq_net_private p;
+	unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+	struct iter_state *st = seq->private;
+	struct net *net;
+	struct nfnl_queue_net *q;
+
+	if (!st)
+		return NULL;
+
+	net = seq_file_net(seq);
+	q = nfnl_queue_pernet(net);
+	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+		if (!hlist_empty(&q->instance_table[st->bucket]))
+			return q->instance_table[st->bucket].first;
+	}
+	return NULL;
+}
+
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+	struct iter_state *st = seq->private;
+	struct net *net = seq_file_net(seq);
+
+	h = h->next;
+	while (!h) {
+		struct nfnl_queue_net *q;
+
+		if (++st->bucket >= INSTANCE_BUCKETS)
+			return NULL;
+
+		q = nfnl_queue_pernet(net);
+		h = q->instance_table[st->bucket].first;
+	}
+	return h;
+}
+
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct hlist_node *head;
+	head = get_first(seq);
+
+	if (head)
+		while (pos && (head = get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *s, loff_t *pos)
+	__acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
+{
+	spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
+	return get_idx(s, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return get_next(s, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+	__releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
+{
+	spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+	const struct nfqnl_instance *inst = v;
+
+	seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
+		   inst->queue_num,
+		   inst->peer_portid, inst->queue_total,
+		   inst->copy_mode, inst->copy_range,
+		   inst->queue_dropped, inst->queue_user_dropped,
+		   inst->id_sequence, 1);
+	return 0;
+}
+
+static const struct seq_operations nfqnl_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show,
+};
+#endif /* PROC_FS */
+
+static int __net_init nfnl_queue_net_init(struct net *net)
+{
+	unsigned int i;
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		INIT_HLIST_HEAD(&q->instance_table[i]);
+
+	spin_lock_init(&q->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
+			&nfqnl_seq_ops, sizeof(struct iter_state)))
+		return -ENOMEM;
+#endif
+	return 0;
+}
+
+static void __net_exit nfnl_queue_net_exit(struct net *net)
+{
+	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+	unsigned int i;
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+#endif
+	for (i = 0; i < INSTANCE_BUCKETS; i++)
+		WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
+}
+
+static struct pernet_operations nfnl_queue_net_ops = {
+	.init		= nfnl_queue_net_init,
+	.exit		= nfnl_queue_net_exit,
+	.id		= &nfnl_queue_net_id,
+	.size		= sizeof(struct nfnl_queue_net),
+};
+
+static int __init nfnetlink_queue_init(void)
+{
+	int status;
+
+	status = register_pernet_subsys(&nfnl_queue_net_ops);
+	if (status < 0) {
+		pr_err("failed to register pernet ops\n");
+		goto out;
+	}
+
+	netlink_register_notifier(&nfqnl_rtnl_notifier);
+	status = nfnetlink_subsys_register(&nfqnl_subsys);
+	if (status < 0) {
+		pr_err("failed to create netlink socket\n");
+		goto cleanup_netlink_notifier;
+	}
+
+	status = register_netdevice_notifier(&nfqnl_dev_notifier);
+	if (status < 0) {
+		pr_err("failed to register netdevice notifier\n");
+		goto cleanup_netlink_subsys;
+	}
+
+	nf_register_queue_handler(&nfqh);
+
+	return status;
+
+cleanup_netlink_subsys:
+	nfnetlink_subsys_unregister(&nfqnl_subsys);
+cleanup_netlink_notifier:
+	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfnl_queue_net_ops);
+out:
+	return status;
+}
+
+static void __exit nfnetlink_queue_fini(void)
+{
+	nf_unregister_queue_handler();
+	unregister_netdevice_notifier(&nfqnl_dev_notifier);
+	nfnetlink_subsys_unregister(&nfqnl_subsys);
+	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfnl_queue_net_ops);
+
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+
+MODULE_DESCRIPTION("netfilter packet queue handler");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
+
+module_init(nfnetlink_queue_init);
+module_exit(nfnetlink_queue_fini);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 000000000..b84312df9
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+
+struct nft_bitwise {
+	u8			sreg;
+	u8			dreg;
+	enum nft_bitwise_ops	op:8;
+	u8			len;
+	struct nft_data		mask;
+	struct nft_data		xor;
+	struct nft_data		data;
+};
+
+static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
+				  const struct nft_bitwise *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
+		dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
+}
+
+static void nft_bitwise_eval_lshift(u32 *dst, const u32 *src,
+				    const struct nft_bitwise *priv)
+{
+	u32 shift = priv->data.data[0];
+	unsigned int i;
+	u32 carry = 0;
+
+	for (i = DIV_ROUND_UP(priv->len, sizeof(u32)); i > 0; i--) {
+		dst[i - 1] = (src[i - 1] << shift) | carry;
+		carry = src[i - 1] >> (BITS_PER_TYPE(u32) - shift);
+	}
+}
+
+static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src,
+				    const struct nft_bitwise *priv)
+{
+	u32 shift = priv->data.data[0];
+	unsigned int i;
+	u32 carry = 0;
+
+	for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) {
+		dst[i] = carry | (src[i] >> shift);
+		carry = src[i] << (BITS_PER_TYPE(u32) - shift);
+	}
+}
+
+void nft_bitwise_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
+
+	switch (priv->op) {
+	case NFT_BITWISE_BOOL:
+		nft_bitwise_eval_bool(dst, src, priv);
+		break;
+	case NFT_BITWISE_LSHIFT:
+		nft_bitwise_eval_lshift(dst, src, priv);
+		break;
+	case NFT_BITWISE_RSHIFT:
+		nft_bitwise_eval_rshift(dst, src, priv);
+		break;
+	}
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+	[NFTA_BITWISE_SREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_DREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_LEN]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_MASK]	= { .type = NLA_NESTED },
+	[NFTA_BITWISE_XOR]	= { .type = NLA_NESTED },
+	[NFTA_BITWISE_OP]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+				 const struct nlattr *const tb[])
+{
+	struct nft_data_desc mask = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->mask),
+		.len	= priv->len,
+	};
+	struct nft_data_desc xor = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->xor),
+		.len	= priv->len,
+	};
+	int err;
+
+	if (tb[NFTA_BITWISE_DATA])
+		return -EINVAL;
+
+	if (!tb[NFTA_BITWISE_MASK] ||
+	    !tb[NFTA_BITWISE_XOR])
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]);
+	if (err < 0)
+		goto err_xor_err;
+
+	return 0;
+
+err_xor_err:
+	nft_data_release(&priv->mask, mask.type);
+
+	return err;
+}
+
+static int nft_bitwise_init_shift(struct nft_bitwise *priv,
+				  const struct nlattr *const tb[])
+{
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->data),
+		.len	= sizeof(u32),
+	};
+	int err;
+
+	if (tb[NFTA_BITWISE_MASK] ||
+	    tb[NFTA_BITWISE_XOR])
+		return -EINVAL;
+
+	if (!tb[NFTA_BITWISE_DATA])
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]);
+	if (err < 0)
+		return err;
+
+	if (priv->data.data[0] >= BITS_PER_TYPE(u32)) {
+		nft_data_release(&priv->data, desc.type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_bitwise *priv = nft_expr_priv(expr);
+	u32 len;
+	int err;
+
+	err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	priv->len = len;
+
+	err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+				      priv->len);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG],
+				       &priv->dreg, NULL, NFT_DATA_VALUE,
+				       priv->len);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_BITWISE_OP]) {
+		priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP]));
+		switch (priv->op) {
+		case NFT_BITWISE_BOOL:
+		case NFT_BITWISE_LSHIFT:
+		case NFT_BITWISE_RSHIFT:
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	} else {
+		priv->op = NFT_BITWISE_BOOL;
+	}
+
+	switch(priv->op) {
+	case NFT_BITWISE_BOOL:
+		err = nft_bitwise_init_bool(priv, tb);
+		break;
+	case NFT_BITWISE_LSHIFT:
+	case NFT_BITWISE_RSHIFT:
+		err = nft_bitwise_init_shift(priv, tb);
+		break;
+	}
+
+	return err;
+}
+
+static int nft_bitwise_dump_bool(struct sk_buff *skb,
+				 const struct nft_bitwise *priv)
+{
+	if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		return -1;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_bitwise_dump_shift(struct sk_buff *skb,
+				  const struct nft_bitwise *priv)
+{
+	if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data,
+			  NFT_DATA_VALUE, sizeof(u32)) < 0)
+		return -1;
+	return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	int err = 0;
+
+	if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg))
+		return -1;
+	if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg))
+		return -1;
+	if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+		return -1;
+	if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(priv->op)))
+		return -1;
+
+	switch (priv->op) {
+	case NFT_BITWISE_BOOL:
+		err = nft_bitwise_dump_bool(skb, priv);
+		break;
+	case NFT_BITWISE_LSHIFT:
+	case NFT_BITWISE_RSHIFT:
+		err = nft_bitwise_dump_shift(skb, priv);
+		break;
+	}
+
+	return err;
+}
+
+static struct nft_data zero;
+
+static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	if (priv->op != NFT_BITWISE_BOOL)
+		return -EOPNOTSUPP;
+
+	if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
+	    priv->sreg != priv->dreg || priv->len != reg->len)
+		return -EOPNOTSUPP;
+
+	memcpy(&reg->mask, &priv->mask, sizeof(priv->mask));
+
+	return 0;
+}
+
+static bool nft_bitwise_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const struct nft_bitwise *bitwise;
+	unsigned int regcount;
+	u8 dreg;
+	int i;
+
+	if (!track->regs[priv->sreg].selector)
+		return false;
+
+	bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+	    track->regs[priv->sreg].num_reg == 0 &&
+	    track->regs[priv->dreg].bitwise &&
+	    track->regs[priv->dreg].bitwise->ops == expr->ops &&
+	    priv->sreg == bitwise->sreg &&
+	    priv->dreg == bitwise->dreg &&
+	    priv->op == bitwise->op &&
+	    priv->len == bitwise->len &&
+	    !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) &&
+	    !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) &&
+	    !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) {
+		track->cur = expr;
+		return true;
+	}
+
+	if (track->regs[priv->sreg].bitwise ||
+	    track->regs[priv->sreg].num_reg != 0) {
+		nft_reg_track_cancel(track, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (priv->sreg != priv->dreg) {
+		nft_reg_track_update(track, track->regs[priv->sreg].selector,
+				     priv->dreg, priv->len);
+	}
+
+	dreg = priv->dreg;
+	regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE);
+	for (i = 0; i < regcount; i++, dreg++)
+		track->regs[dreg].bitwise = expr;
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_bitwise_ops = {
+	.type		= &nft_bitwise_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+	.eval		= nft_bitwise_eval,
+	.init		= nft_bitwise_init,
+	.dump		= nft_bitwise_dump,
+	.reduce		= nft_bitwise_reduce,
+	.offload	= nft_bitwise_offload,
+};
+
+static int
+nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out)
+{
+	struct nft_data data;
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(data),
+		.len	= sizeof(u32),
+	};
+	int err;
+
+	err = nft_data_init(NULL, &data, &desc, tb);
+	if (err < 0)
+		return err;
+
+	*out = data.data[0];
+
+	return 0;
+}
+
+static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr,
+				 const struct nlattr * const tb[])
+{
+	struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	int err;
+
+	err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+				      sizeof(u32));
+	if (err < 0)
+		return err;
+
+	err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], &priv->dreg,
+				       NULL, NFT_DATA_VALUE, sizeof(u32));
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_BITWISE_DATA])
+		return -EINVAL;
+
+	if (!tb[NFTA_BITWISE_MASK] ||
+	    !tb[NFTA_BITWISE_XOR])
+		return -EINVAL;
+
+	err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_MASK], &priv->mask);
+	if (err < 0)
+		return err;
+
+	err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_XOR], &priv->xor);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int
+nft_bitwise_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_data data;
+
+	if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg))
+		return -1;
+	if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg))
+		return -1;
+	if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32))))
+		return -1;
+	if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL)))
+		return -1;
+
+	data.data[0] = priv->mask;
+	if (nft_data_dump(skb, NFTA_BITWISE_MASK, &data,
+			  NFT_DATA_VALUE, sizeof(u32)) < 0)
+		return -1;
+
+	data.data[0] = priv->xor;
+	if (nft_data_dump(skb, NFTA_BITWISE_XOR, &data,
+			  NFT_DATA_VALUE, sizeof(u32)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx,
+				    struct nft_flow_rule *flow,
+				    const struct nft_expr *expr)
+{
+	const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	if (priv->xor || priv->sreg != priv->dreg || reg->len != sizeof(u32))
+		return -EOPNOTSUPP;
+
+	reg->mask.data[0] = priv->mask;
+	return 0;
+}
+
+static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
+				    const struct nft_expr *expr)
+{
+	const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	const struct nft_bitwise_fast_expr *bitwise;
+
+	if (!track->regs[priv->sreg].selector)
+		return false;
+
+	bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+	    track->regs[priv->dreg].bitwise &&
+	    track->regs[priv->dreg].bitwise->ops == expr->ops &&
+	    priv->sreg == bitwise->sreg &&
+	    priv->dreg == bitwise->dreg &&
+	    priv->mask == bitwise->mask &&
+	    priv->xor == bitwise->xor) {
+		track->cur = expr;
+		return true;
+	}
+
+	if (track->regs[priv->sreg].bitwise) {
+		nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
+		return false;
+	}
+
+	if (priv->sreg != priv->dreg) {
+		track->regs[priv->dreg].selector =
+			track->regs[priv->sreg].selector;
+	}
+	track->regs[priv->dreg].bitwise = expr;
+
+	return false;
+}
+
+const struct nft_expr_ops nft_bitwise_fast_ops = {
+	.type		= &nft_bitwise_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)),
+	.eval		= NULL, /* inlined */
+	.init		= nft_bitwise_fast_init,
+	.dump		= nft_bitwise_fast_dump,
+	.reduce		= nft_bitwise_fast_reduce,
+	.offload	= nft_bitwise_fast_offload,
+};
+
+static const struct nft_expr_ops *
+nft_bitwise_select_ops(const struct nft_ctx *ctx,
+		       const struct nlattr * const tb[])
+{
+	int err;
+	u32 len;
+
+	if (!tb[NFTA_BITWISE_LEN] ||
+	    !tb[NFTA_BITWISE_SREG] ||
+	    !tb[NFTA_BITWISE_DREG])
+		return ERR_PTR(-EINVAL);
+
+	err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (len != sizeof(u32))
+		return &nft_bitwise_ops;
+
+	if (tb[NFTA_BITWISE_OP] &&
+	    ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL)
+		return &nft_bitwise_ops;
+
+	return &nft_bitwise_fast_ops;
+}
+
+struct nft_expr_type nft_bitwise_type __read_mostly = {
+	.name		= "bitwise",
+	.select_ops	= nft_bitwise_select_ops,
+	.policy		= nft_bitwise_policy,
+	.maxattr	= NFTA_BITWISE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+			     const struct nft_expr *expr)
+{
+	const struct nft_expr *last = track->last;
+	const struct nft_expr *next;
+
+	if (expr == last)
+		return false;
+
+	next = nft_expr_next(expr);
+	if (next->ops == &nft_bitwise_ops)
+		return nft_bitwise_reduce(track, next);
+	else if (next->ops == &nft_bitwise_fast_ops)
+		return nft_bitwise_fast_reduce(track, next);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(nft_expr_reduce_bitwise);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 000000000..605178133
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+	u8			sreg;
+	u8			dreg;
+	enum nft_byteorder_ops	op:8;
+	u8			len;
+	u8			size;
+};
+
+void nft_byteorder_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+	u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
+	u16 *s16, *d16;
+	unsigned int i;
+
+	s16 = (void *)src;
+	d16 = (void *)dst;
+
+	switch (priv->size) {
+	case 8: {
+		u64 *dst64 = (void *)dst;
+		u64 src64;
+
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 8; i++) {
+				src64 = nft_reg_load64(&src[i]);
+				nft_reg_store64(&dst64[i],
+						be64_to_cpu((__force __be64)src64));
+			}
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 8; i++) {
+				src64 = (__force __u64)
+					cpu_to_be64(nft_reg_load64(&src[i]));
+				nft_reg_store64(&dst64[i], src64);
+			}
+			break;
+		}
+		break;
+	}
+	case 4:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 4; i++)
+				dst[i] = ntohl((__force __be32)src[i]);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 4; i++)
+				dst[i] = (__force __u32)htonl(src[i]);
+			break;
+		}
+		break;
+	case 2:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 2; i++)
+				d16[i] = ntohs((__force __be16)s16[i]);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 2; i++)
+				d16[i] = (__force __u16)htons(s16[i]);
+			break;
+		}
+		break;
+	}
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+	[NFTA_BYTEORDER_SREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_DREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_OP]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_LEN]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_SIZE]	= { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_byteorder *priv = nft_expr_priv(expr);
+	u32 size, len;
+	int err;
+
+	if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+	    tb[NFTA_BYTEORDER_DREG] == NULL ||
+	    tb[NFTA_BYTEORDER_LEN] == NULL ||
+	    tb[NFTA_BYTEORDER_SIZE] == NULL ||
+	    tb[NFTA_BYTEORDER_OP] == NULL)
+		return -EINVAL;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+	switch (priv->op) {
+	case NFT_BYTEORDER_NTOH:
+	case NFT_BYTEORDER_HTON:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size);
+	if (err < 0)
+		return err;
+
+	priv->size = size;
+
+	switch (priv->size) {
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	priv->len = len;
+
+	err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+				      priv->len);
+	if (err < 0)
+		return err;
+
+	return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
+					&priv->dreg, NULL, NFT_DATA_VALUE,
+					priv->len);
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_byteorder_reduce(struct nft_regs_track *track,
+				 const struct nft_expr *expr)
+{
+	struct nft_byteorder *priv = nft_expr_priv(expr);
+
+	nft_reg_track_cancel(track, priv->dreg, priv->len);
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_byteorder_ops = {
+	.type		= &nft_byteorder_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+	.eval		= nft_byteorder_eval,
+	.init		= nft_byteorder_init,
+	.dump		= nft_byteorder_dump,
+	.reduce		= nft_byteorder_reduce,
+};
+
+struct nft_expr_type nft_byteorder_type __read_mostly = {
+	.name		= "byteorder",
+	.ops		= &nft_byteorder_ops,
+	.policy		= nft_byteorder_policy,
+	.maxattr	= NFTA_BYTEORDER_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
new file mode 100644
index 000000000..274b6f7e6
--- /dev/null
+++ b/net/netfilter/nft_chain_filter.c
@@ -0,0 +1,456 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nft_do_chain_ipv4(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv4(&pkt);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv4 = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_IPV4,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv4,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv4,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv4,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv4,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv4,
+	},
+};
+
+static void nft_chain_filter_ipv4_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_ipv4);
+}
+static void nft_chain_filter_ipv4_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_ipv4);
+}
+
+#else
+static inline void nft_chain_filter_ipv4_init(void) {}
+static inline void nft_chain_filter_ipv4_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV4 */
+
+#ifdef CONFIG_NF_TABLES_ARP
+static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_unspec(&pkt);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_arp = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_ARP,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_ARP_IN) |
+			  (1 << NF_ARP_OUT),
+	.hooks		= {
+		[NF_ARP_IN]		= nft_do_chain_arp,
+		[NF_ARP_OUT]		= nft_do_chain_arp,
+	},
+};
+
+static void nft_chain_filter_arp_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_arp);
+}
+
+static void nft_chain_filter_arp_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_arp);
+}
+#else
+static inline void nft_chain_filter_arp_init(void) {}
+static inline void nft_chain_filter_arp_fini(void) {}
+#endif /* CONFIG_NF_TABLES_ARP */
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nft_do_chain_ipv6(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv6 = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_IPV6,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv6,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
+	},
+};
+
+static void nft_chain_filter_ipv6_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_ipv6);
+}
+
+static void nft_chain_filter_ipv6_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_ipv6);
+}
+#else
+static inline void nft_chain_filter_ipv6_init(void) {}
+static inline void nft_chain_filter_ipv6_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (state->pf) {
+	case NFPROTO_IPV4:
+		nft_set_pktinfo_ipv4(&pkt);
+		break;
+	case NFPROTO_IPV6:
+		nft_set_pktinfo_ipv6(&pkt);
+		break;
+	default:
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb,
+					      const struct nf_hook_state *state)
+{
+	struct nf_hook_state ingress_state = *state;
+	struct nft_pktinfo pkt;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		/* Original hook is NFPROTO_NETDEV and NF_NETDEV_INGRESS. */
+		ingress_state.pf = NFPROTO_IPV4;
+		ingress_state.hook = NF_INET_INGRESS;
+		nft_set_pktinfo(&pkt, skb, &ingress_state);
+
+		if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0)
+			return NF_DROP;
+		break;
+	case htons(ETH_P_IPV6):
+		ingress_state.pf = NFPROTO_IPV6;
+		ingress_state.hook = NF_INET_INGRESS;
+		nft_set_pktinfo(&pkt, skb, &ingress_state);
+
+		if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0)
+			return NF_DROP;
+		break;
+	default:
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_inet = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_INET,
+	.hook_mask	= (1 << NF_INET_INGRESS) |
+			  (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_INGRESS]	= nft_do_chain_inet_ingress,
+		[NF_INET_LOCAL_IN]	= nft_do_chain_inet,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_inet,
+		[NF_INET_FORWARD]	= nft_do_chain_inet,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_inet,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_inet,
+        },
+};
+
+static void nft_chain_filter_inet_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_inet);
+}
+
+static void nft_chain_filter_inet_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_inet);
+}
+#else
+static inline void nft_chain_filter_inet_init(void) {}
+static inline void nft_chain_filter_inet_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
+static unsigned int
+nft_do_chain_bridge(void *priv,
+		    struct sk_buff *skb,
+		    const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (eth_hdr(skb)->h_proto) {
+	case htons(ETH_P_IP):
+		nft_set_pktinfo_ipv4_validate(&pkt);
+		break;
+	case htons(ETH_P_IPV6):
+		nft_set_pktinfo_ipv6_validate(&pkt);
+		break;
+	default:
+		nft_set_pktinfo_unspec(&pkt);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_bridge = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_BRIDGE,
+	.hook_mask	= (1 << NF_BR_PRE_ROUTING) |
+			  (1 << NF_BR_LOCAL_IN) |
+			  (1 << NF_BR_FORWARD) |
+			  (1 << NF_BR_LOCAL_OUT) |
+			  (1 << NF_BR_POST_ROUTING),
+	.hooks		= {
+		[NF_BR_PRE_ROUTING]	= nft_do_chain_bridge,
+		[NF_BR_LOCAL_IN]	= nft_do_chain_bridge,
+		[NF_BR_FORWARD]		= nft_do_chain_bridge,
+		[NF_BR_LOCAL_OUT]	= nft_do_chain_bridge,
+		[NF_BR_POST_ROUTING]	= nft_do_chain_bridge,
+	},
+};
+
+static void nft_chain_filter_bridge_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_bridge);
+}
+
+static void nft_chain_filter_bridge_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_bridge);
+}
+#else
+static inline void nft_chain_filter_bridge_init(void) {}
+static inline void nft_chain_filter_bridge_fini(void) {}
+#endif /* CONFIG_NF_TABLES_BRIDGE */
+
+#ifdef CONFIG_NF_TABLES_NETDEV
+static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb,
+					const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nft_set_pktinfo_ipv4_validate(&pkt);
+		break;
+	case htons(ETH_P_IPV6):
+		nft_set_pktinfo_ipv6_validate(&pkt);
+		break;
+	default:
+		nft_set_pktinfo_unspec(&pkt);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_netdev = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_NETDEV,
+	.hook_mask	= (1 << NF_NETDEV_INGRESS) |
+			  (1 << NF_NETDEV_EGRESS),
+	.hooks		= {
+		[NF_NETDEV_INGRESS]	= nft_do_chain_netdev,
+		[NF_NETDEV_EGRESS]	= nft_do_chain_netdev,
+	},
+};
+
+static void nft_netdev_event(unsigned long event, struct net_device *dev,
+			     struct nft_ctx *ctx)
+{
+	struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+	struct nft_hook *hook, *found = NULL;
+	int n = 0;
+
+	if (event != NETDEV_UNREGISTER)
+		return;
+
+	list_for_each_entry(hook, &basechain->hook_list, list) {
+		if (hook->ops.dev == dev)
+			found = hook;
+
+		n++;
+	}
+	if (!found)
+		return;
+
+	if (n > 1) {
+		nf_unregister_net_hook(ctx->net, &found->ops);
+		list_del_rcu(&found->list);
+		kfree_rcu(found, rcu);
+		return;
+	}
+
+	/* UNREGISTER events are also happening on netns exit.
+	 *
+	 * Although nf_tables core releases all tables/chains, only this event
+	 * handler provides guarantee that hook->ops.dev is still accessible,
+	 * so we cannot skip exiting net namespaces.
+	 */
+	__nft_release_basechain(ctx);
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nft_base_chain *basechain;
+	struct nftables_pernet *nft_net;
+	struct nft_chain *chain, *nr;
+	struct nft_table *table;
+	struct nft_ctx ctx = {
+		.net	= dev_net(dev),
+	};
+
+	if (event != NETDEV_UNREGISTER &&
+	    event != NETDEV_CHANGENAME)
+		return NOTIFY_DONE;
+
+	nft_net = nft_pernet(ctx.net);
+	mutex_lock(&nft_net->commit_mutex);
+	list_for_each_entry(table, &nft_net->tables, list) {
+		if (table->family != NFPROTO_NETDEV &&
+		    table->family != NFPROTO_INET)
+			continue;
+
+		ctx.family = table->family;
+		ctx.table = table;
+		list_for_each_entry_safe(chain, nr, &table->chains, list) {
+			if (!nft_is_base_chain(chain))
+				continue;
+
+			basechain = nft_base_chain(chain);
+			if (table->family == NFPROTO_INET &&
+			    basechain->ops.hooknum != NF_INET_INGRESS)
+				continue;
+
+			ctx.chain = chain;
+			nft_netdev_event(event, dev, &ctx);
+		}
+	}
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_netdev_notifier = {
+	.notifier_call	= nf_tables_netdev_event,
+};
+
+static int nft_chain_filter_netdev_init(void)
+{
+	int err;
+
+	nft_register_chain_type(&nft_chain_filter_netdev);
+
+	err = register_netdevice_notifier(&nf_tables_netdev_notifier);
+	if (err)
+		goto err_register_netdevice_notifier;
+
+	return 0;
+
+err_register_netdevice_notifier:
+	nft_unregister_chain_type(&nft_chain_filter_netdev);
+
+	return err;
+}
+
+static void nft_chain_filter_netdev_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_netdev);
+	unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+}
+#else
+static inline int nft_chain_filter_netdev_init(void) { return 0; }
+static inline void nft_chain_filter_netdev_fini(void) {}
+#endif /* CONFIG_NF_TABLES_NETDEV */
+
+int __init nft_chain_filter_init(void)
+{
+	int err;
+
+	err = nft_chain_filter_netdev_init();
+	if (err < 0)
+		return err;
+
+	nft_chain_filter_ipv4_init();
+	nft_chain_filter_ipv6_init();
+	nft_chain_filter_arp_init();
+	nft_chain_filter_inet_init();
+	nft_chain_filter_bridge_init();
+
+	return 0;
+}
+
+void nft_chain_filter_fini(void)
+{
+	nft_chain_filter_bridge_fini();
+	nft_chain_filter_inet_fini();
+	nft_chain_filter_arp_fini();
+	nft_chain_filter_ipv6_fini();
+	nft_chain_filter_ipv4_fini();
+	nft_chain_filter_netdev_fini();
+}
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
new file mode 100644
index 000000000..98e494610
--- /dev/null
+++ b/net/netfilter/nft_chain_nat.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (state->pf) {
+#ifdef CONFIG_NF_TABLES_IPV4
+	case NFPROTO_IPV4:
+		nft_set_pktinfo_ipv4(&pkt);
+		break;
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+	case NFPROTO_IPV6:
+		nft_set_pktinfo_ipv6(&pkt);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_IPV4,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+	},
+	.ops_register = nf_nat_ipv4_register_fn,
+	.ops_unregister = nf_nat_ipv4_unregister_fn,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_IPV6,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+	},
+	.ops_register		= nf_nat_ipv6_register_fn,
+	.ops_unregister		= nf_nat_ipv6_unregister_fn,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops)
+{
+	return nf_nat_inet_register_fn(net, ops);
+}
+
+static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops)
+{
+	nf_nat_inet_unregister_fn(net, ops);
+}
+
+static const struct nft_chain_type nft_chain_nat_inet = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_INET,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_IN]	= nft_nat_do_chain,
+		[NF_INET_LOCAL_OUT]	= nft_nat_do_chain,
+		[NF_INET_POST_ROUTING]	= nft_nat_do_chain,
+	},
+	.ops_register		= nft_nat_inet_reg,
+	.ops_unregister		= nft_nat_inet_unreg,
+};
+#endif
+
+static int __init nft_chain_nat_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_register_chain_type(&nft_chain_nat_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_register_chain_type(&nft_chain_nat_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_register_chain_type(&nft_chain_nat_inet);
+#endif
+
+	return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_unregister_chain_type(&nft_chain_nat_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_chain_type(&nft_chain_nat_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_chain_type(&nft_chain_nat_inet);
+#endif
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+#ifdef CONFIG_NF_TABLES_IPV4
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat");	/* NFPROTO_INET */
+#endif
diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
new file mode 100644
index 000000000..925db0dce
--- /dev/null
+++ b/net/netfilter/nft_chain_route.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nf_route_table_hook4(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	const struct iphdr *iph;
+	struct nft_pktinfo pkt;
+	__be32 saddr, daddr;
+	unsigned int ret;
+	u32 mark;
+	int err;
+	u8 tos;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv4(&pkt);
+
+	mark = skb->mark;
+	iph = ip_hdr(skb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
+
+	ret = nft_do_chain(&pkt, priv);
+	if (ret == NF_ACCEPT) {
+		iph = ip_hdr(skb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    skb->mark != mark ||
+		    iph->tos != tos) {
+			err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+	return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv4 = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_IPV4,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_hook4,
+	},
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nf_route_table_hook6(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	struct in6_addr saddr, daddr;
+	struct nft_pktinfo pkt;
+	u32 mark, flowlabel;
+	unsigned int ret;
+	u8 hop_limit;
+	int err;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt);
+
+	/* save source/dest address, mark, hoplimit, flowlabel, priority */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either)*/
+	flowlabel = *((u32 *)ipv6_hdr(skb));
+
+	ret = nft_do_chain(&pkt, priv);
+	if (ret == NF_ACCEPT &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u32 *)ipv6_hdr(skb)))) {
+		err = nf_ip6_route_me_harder(state->net, state->sk, skb);
+		if (err < 0)
+			ret = NF_DROP_ERR(err);
+	}
+
+	return ret;
+}
+
+static const struct nft_chain_type nft_chain_route_ipv6 = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_IPV6,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_hook6,
+	},
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nf_route_table_inet(void *priv,
+					struct sk_buff *skb,
+					const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	switch (state->pf) {
+	case NFPROTO_IPV4:
+		return nf_route_table_hook4(priv, skb, state);
+	case NFPROTO_IPV6:
+		return nf_route_table_hook6(priv, skb, state);
+	default:
+		nft_set_pktinfo(&pkt, skb, state);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_route_inet = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_INET,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nf_route_table_inet,
+	},
+};
+#endif
+
+void __init nft_chain_route_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_register_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_register_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_register_chain_type(&nft_chain_route_inet);
+#endif
+}
+
+void __exit nft_chain_route_fini(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_chain_type(&nft_chain_route_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+	nft_unregister_chain_type(&nft_chain_route_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_chain_type(&nft_chain_route_inet);
+#endif
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 000000000..963cf8317
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+	struct nft_data		data;
+	u8			sreg;
+	u8			len;
+	enum nft_cmp_ops	op:8;
+};
+
+void nft_cmp_eval(const struct nft_expr *expr,
+		  struct nft_regs *regs,
+		  const struct nft_pktinfo *pkt)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	int d;
+
+	d = memcmp(&regs->data[priv->sreg], &priv->data, priv->len);
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+		if (d != 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_NEQ:
+		if (d == 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_LT:
+		if (d == 0)
+			goto mismatch;
+		fallthrough;
+	case NFT_CMP_LTE:
+		if (d > 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_GT:
+		if (d == 0)
+			goto mismatch;
+		fallthrough;
+	case NFT_CMP_GTE:
+		if (d < 0)
+			goto mismatch;
+		break;
+	}
+	return;
+
+mismatch:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+	[NFTA_CMP_SREG]		= { .type = NLA_U32 },
+	[NFTA_CMP_OP]		= { .type = NLA_U32 },
+	[NFTA_CMP_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->data),
+	};
+	int err;
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+	if (err < 0)
+		return err;
+
+	priv->op  = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+	priv->len = desc.len;
+	return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+union nft_cmp_offload_data {
+	u16	val16;
+	u32	val32;
+	u64	val64;
+};
+
+static void nft_payload_n2h(union nft_cmp_offload_data *data,
+			    const u8 *val, u32 len)
+{
+	switch (len) {
+	case 2:
+		data->val16 = ntohs(*((__be16 *)val));
+		break;
+	case 4:
+		data->val32 = ntohl(*((__be32 *)val));
+		break;
+	case 8:
+		data->val64 = be64_to_cpu(*((__be64 *)val));
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
+			     struct nft_flow_rule *flow,
+			     const struct nft_cmp_expr *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
+	union nft_cmp_offload_data _data, _datamask;
+	u8 *mask = (u8 *)&flow->match.mask;
+	u8 *key = (u8 *)&flow->match.key;
+	u8 *data, *datamask;
+
+	if (priv->op != NFT_CMP_EQ || priv->len > reg->len)
+		return -EOPNOTSUPP;
+
+	if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) {
+		nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len);
+		nft_payload_n2h(&_datamask, (u8 *)&reg->mask, reg->len);
+		data = (u8 *)&_data;
+		datamask = (u8 *)&_datamask;
+	} else {
+		data = (u8 *)&priv->data;
+		datamask = (u8 *)&reg->mask;
+	}
+
+	memcpy(key + reg->offset, data, reg->len);
+	memcpy(mask + reg->offset, datamask, reg->len);
+
+	flow->match.dissector.used_keys |= BIT(reg->key);
+	flow->match.dissector.offset[reg->key] = reg->base_offset;
+
+	if (reg->key == FLOW_DISSECTOR_KEY_META &&
+	    reg->offset == offsetof(struct nft_flow_key, meta.ingress_iftype) &&
+	    nft_reg_load16(priv->data.data) != ARPHRD_ETHER)
+		return -EOPNOTSUPP;
+
+	nft_offload_update_dependency(ctx, &priv->data, reg->len);
+
+	return 0;
+}
+
+static int nft_cmp_offload(struct nft_offload_ctx *ctx,
+			   struct nft_flow_rule *flow,
+			   const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	return __nft_cmp_offload(ctx, flow, priv);
+}
+
+static const struct nft_expr_ops nft_cmp_ops = {
+	.type		= &nft_cmp_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+	.eval		= nft_cmp_eval,
+	.init		= nft_cmp_init,
+	.dump		= nft_cmp_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_cmp_offload,
+};
+
+/* Calculate the mask for the nft_cmp_fast expression. On big endian the
+ * mask needs to include the *upper* bytes when interpreting that data as
+ * something smaller than the full u32, therefore a cpu_to_le32 is done.
+ */
+static u32 nft_cmp_fast_mask(unsigned int len)
+{
+	__le32 mask = cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr,
+					  data) * BITS_PER_BYTE - len));
+
+	return (__force u32)mask;
+}
+
+static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_data data;
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(data),
+	};
+	int err;
+
+	err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+	if (err < 0)
+		return err;
+
+	desc.len *= BITS_PER_BYTE;
+
+	priv->mask = nft_cmp_fast_mask(desc.len);
+	priv->data = data.data[0] & priv->mask;
+	priv->len  = desc.len;
+	priv->inv  = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
+	return 0;
+}
+
+static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx,
+				struct nft_flow_rule *flow,
+				const struct nft_expr *expr)
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_cmp_expr cmp = {
+		.data	= {
+			.data	= {
+				[0] = priv->data,
+			},
+		},
+		.sreg	= priv->sreg,
+		.len	= priv->len / BITS_PER_BYTE,
+		.op	= priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
+	};
+
+	return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
+static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+	enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
+	struct nft_data data;
+
+	if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
+		goto nla_put_failure;
+
+	data.data[0] = priv->data;
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &data,
+			  NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+const struct nft_expr_ops nft_cmp_fast_ops = {
+	.type		= &nft_cmp_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)),
+	.eval		= NULL,	/* inlined */
+	.init		= nft_cmp_fast_init,
+	.dump		= nft_cmp_fast_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_cmp_fast_offload,
+};
+
+static u32 nft_cmp_mask(u32 bitlen)
+{
+	return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
+}
+
+static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
+{
+	int len = bitlen / BITS_PER_BYTE;
+	int i, words = len / sizeof(u32);
+
+	for (i = 0; i < words; i++) {
+		data->data[i] = 0xffffffff;
+		bitlen -= sizeof(u32) * BITS_PER_BYTE;
+	}
+
+	if (len % sizeof(u32))
+		data->data[i++] = nft_cmp_mask(bitlen);
+
+	for (; i < 4; i++)
+		data->data[i] = 0;
+}
+
+static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->data),
+	};
+	int err;
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+	if (err < 0)
+		return err;
+
+	nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
+	priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
+	priv->len = desc.len;
+
+	return 0;
+}
+
+static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+	struct nft_cmp_expr cmp = {
+		.data	= priv->data,
+		.sreg	= priv->sreg,
+		.len	= priv->len,
+		.op	= priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
+	};
+
+	return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
+static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+	enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
+
+	if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+
+const struct nft_expr_ops nft_cmp16_fast_ops = {
+	.type		= &nft_cmp_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
+	.eval		= NULL,	/* inlined */
+	.init		= nft_cmp16_fast_init,
+	.dump		= nft_cmp16_fast_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_cmp16_fast_offload,
+};
+
+static const struct nft_expr_ops *
+nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+	struct nft_data data;
+	struct nft_data_desc desc = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(data),
+	};
+	enum nft_cmp_ops op;
+	u8 sreg;
+	int err;
+
+	if (tb[NFTA_CMP_SREG] == NULL ||
+	    tb[NFTA_CMP_OP] == NULL ||
+	    tb[NFTA_CMP_DATA] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+	switch (op) {
+	case NFT_CMP_EQ:
+	case NFT_CMP_NEQ:
+	case NFT_CMP_LT:
+	case NFT_CMP_LTE:
+	case NFT_CMP_GT:
+	case NFT_CMP_GTE:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
+	if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
+		if (desc.len <= sizeof(u32))
+			return &nft_cmp_fast_ops;
+		else if (desc.len <= sizeof(data) &&
+			 ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
+			  (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
+			return &nft_cmp16_fast_ops;
+	}
+	return &nft_cmp_ops;
+}
+
+struct nft_expr_type nft_cmp_type __read_mostly = {
+	.name		= "cmp",
+	.select_ops	= nft_cmp_select_ops,
+	.policy		= nft_cmp_policy,
+	.maxattr	= NFTA_CMP_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
new file mode 100644
index 000000000..6952da7df
--- /dev/null
+++ b/net/netfilter/nft_compat.c
@@ -0,0 +1,958 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_tables_compat.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+
+/* Used for matches where *info is larger than X byte */
+#define NFT_MATCH_LARGE_THRESH	192
+
+struct nft_xt_match_priv {
+	void *info;
+};
+
+static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
+						const char *tablename)
+{
+	enum nft_chain_types type = NFT_CHAIN_T_DEFAULT;
+	const struct nft_chain *chain = ctx->chain;
+	const struct nft_base_chain *basechain;
+
+	if (!tablename ||
+	    !nft_is_base_chain(chain))
+		return 0;
+
+	basechain = nft_base_chain(chain);
+	if (strcmp(tablename, "nat") == 0) {
+		if (ctx->family != NFPROTO_BRIDGE)
+			type = NFT_CHAIN_T_NAT;
+		if (basechain->type->type != type)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+union nft_entry {
+	struct ipt_entry e4;
+	struct ip6t_entry e6;
+	struct ebt_entry ebt;
+	struct arpt_entry arp;
+};
+
+static inline void
+nft_compat_set_par(struct xt_action_param *par,
+		   const struct nft_pktinfo *pkt,
+		   const void *xt, const void *xt_info)
+{
+	par->state	= pkt->state;
+	par->thoff	= nft_thoff(pkt);
+	par->fragoff	= pkt->fragoff;
+	par->target	= xt;
+	par->targinfo	= xt_info;
+	par->hotdrop	= false;
+}
+
+static void nft_target_eval_xt(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
+	int ret;
+
+	nft_compat_set_par(&xt, pkt, target, info);
+
+	ret = target->target(skb, &xt);
+
+	if (xt.hotdrop)
+		ret = NF_DROP;
+
+	switch (ret) {
+	case XT_CONTINUE:
+		regs->verdict.code = NFT_CONTINUE;
+		break;
+	default:
+		regs->verdict.code = ret;
+		break;
+	}
+}
+
+static void nft_target_eval_bridge(const struct nft_expr *expr,
+				   struct nft_regs *regs,
+				   const struct nft_pktinfo *pkt)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
+	int ret;
+
+	nft_compat_set_par(&xt, pkt, target, info);
+
+	ret = target->target(skb, &xt);
+
+	if (xt.hotdrop)
+		ret = NF_DROP;
+
+	switch (ret) {
+	case EBT_ACCEPT:
+		regs->verdict.code = NF_ACCEPT;
+		break;
+	case EBT_DROP:
+		regs->verdict.code = NF_DROP;
+		break;
+	case EBT_CONTINUE:
+		regs->verdict.code = NFT_CONTINUE;
+		break;
+	case EBT_RETURN:
+		regs->verdict.code = NFT_RETURN;
+		break;
+	default:
+		regs->verdict.code = ret;
+		break;
+	}
+}
+
+static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
+	[NFTA_TARGET_NAME]	= { .type = NLA_NUL_STRING },
+	[NFTA_TARGET_REV]	= { .type = NLA_U32 },
+	[NFTA_TARGET_INFO]	= { .type = NLA_BINARY },
+};
+
+static void
+nft_target_set_tgchk_param(struct xt_tgchk_param *par,
+			   const struct nft_ctx *ctx,
+			   struct xt_target *target, void *info,
+			   union nft_entry *entry, u16 proto, bool inv)
+{
+	par->net	= ctx->net;
+	par->table	= ctx->table->name;
+	switch (ctx->family) {
+	case AF_INET:
+		entry->e4.ip.proto = proto;
+		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+		break;
+	case AF_INET6:
+		if (proto)
+			entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
+		entry->e6.ipv6.proto = proto;
+		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+		break;
+	case NFPROTO_BRIDGE:
+		entry->ebt.ethproto = (__force __be16)proto;
+		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+		break;
+	case NFPROTO_ARP:
+		break;
+	}
+	par->entryinfo	= entry;
+	par->target	= target;
+	par->targinfo	= info;
+	if (nft_is_base_chain(ctx->chain)) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		par->hook_mask = 1 << ops->hooknum;
+	} else {
+		par->hook_mask = 0;
+	}
+	par->family	= ctx->family;
+	par->nft_compat = true;
+}
+
+static void target_compat_from_user(struct xt_target *t, void *in, void *out)
+{
+	int pad;
+
+	memcpy(out, in, t->targetsize);
+	pad = XT_ALIGN(t->targetsize) - t->targetsize;
+	if (pad > 0)
+		memset(out + t->targetsize, 0, pad);
+}
+
+static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
+	[NFTA_RULE_COMPAT_PROTO]	= { .type = NLA_U32 },
+	[NFTA_RULE_COMPAT_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
+{
+	struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+	u32 flags;
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_RULE_COMPAT_MAX, attr,
+					  nft_rule_compat_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS])
+		return -EINVAL;
+
+	flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
+	if (flags & ~NFT_RULE_COMPAT_F_MASK)
+		return -EINVAL;
+	if (flags & NFT_RULE_COMPAT_F_INV)
+		*inv = true;
+
+	*proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+	return 0;
+}
+
+static void nft_compat_wait_for_destructors(void)
+{
+	/* xtables matches or targets can have side effects, e.g.
+	 * creation/destruction of /proc files.
+	 * The xt ->destroy functions are run asynchronously from
+	 * work queue.  If we have pending invocations we thus
+	 * need to wait for those to finish.
+	 */
+	nf_tables_trans_destroy_flush_work();
+}
+
+static int
+nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		const struct nlattr * const tb[])
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct xt_tgchk_param par;
+	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+	u16 proto = 0;
+	bool inv = false;
+	union nft_entry e = {};
+	int ret;
+
+	target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
+
+	if (ctx->nla[NFTA_RULE_COMPAT]) {
+		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
+		if (ret < 0)
+			return ret;
+	}
+
+	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
+	nft_compat_wait_for_destructors();
+
+	ret = xt_check_target(&par, size, proto, inv);
+	if (ret < 0) {
+		if (ret == -ENOENT) {
+			const char *modname = NULL;
+
+			if (strcmp(target->name, "LOG") == 0)
+				modname = "nf_log_syslog";
+			else if (strcmp(target->name, "NFLOG") == 0)
+				modname = "nfnetlink_log";
+
+			if (modname &&
+			    nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
+				return -EAGAIN;
+		}
+
+		return ret;
+	}
+
+	/* The standard target cannot be used */
+	if (!target->target)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __nft_mt_tg_destroy(struct module *me, const struct nft_expr *expr)
+{
+	module_put(me);
+	kfree(expr->ops);
+}
+
+static void
+nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	struct xt_target *target = expr->ops->data;
+	void *info = nft_expr_priv(expr);
+	struct module *me = target->me;
+	struct xt_tgdtor_param par;
+
+	par.net = ctx->net;
+	par.target = target;
+	par.targinfo = info;
+	par.family = ctx->family;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+
+	__nft_mt_tg_destroy(me, expr);
+}
+
+static int nft_extension_dump_info(struct sk_buff *skb, int attr,
+				   const void *info,
+				   unsigned int size, unsigned int user_size)
+{
+	unsigned int info_size, aligned_size = XT_ALIGN(size);
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, attr, aligned_size);
+	if (!nla)
+		return -1;
+
+	info_size = user_size ? : size;
+	memcpy(nla_data(nla), info, info_size);
+	memset(nla_data(nla) + info_size, 0, aligned_size - info_size);
+
+	return 0;
+}
+
+static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct xt_target *target = expr->ops->data;
+	void *info = nft_expr_priv(expr);
+
+	if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
+	    nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
+	    nft_extension_dump_info(skb, NFTA_TARGET_INFO, info,
+				    target->targetsize, target->usersize))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_target_validate(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nft_data **data)
+{
+	struct xt_target *target = expr->ops->data;
+	unsigned int hook_mask = 0;
+	int ret;
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_BRIDGE &&
+	    ctx->family != NFPROTO_ARP)
+		return -EOPNOTSUPP;
+
+	if (nft_is_base_chain(ctx->chain)) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		hook_mask = 1 << ops->hooknum;
+		if (target->hooks && !(hook_mask & target->hooks))
+			return -EINVAL;
+
+		ret = nft_compat_chain_validate_dependency(ctx, target->table);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static void __nft_match_eval(const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_pktinfo *pkt,
+			     void *info)
+{
+	struct xt_match *match = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
+	bool ret;
+
+	nft_compat_set_par(&xt, pkt, match, info);
+
+	ret = match->match(skb, &xt);
+
+	if (xt.hotdrop) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	switch (ret ? 1 : 0) {
+	case 1:
+		regs->verdict.code = NFT_CONTINUE;
+		break;
+	case 0:
+		regs->verdict.code = NFT_BREAK;
+		break;
+	}
+}
+
+static void nft_match_large_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+	__nft_match_eval(expr, regs, pkt, priv->info);
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	__nft_match_eval(expr, regs, pkt, nft_expr_priv(expr));
+}
+
+static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
+	[NFTA_MATCH_NAME]	= { .type = NLA_NUL_STRING },
+	[NFTA_MATCH_REV]	= { .type = NLA_U32 },
+	[NFTA_MATCH_INFO]	= { .type = NLA_BINARY },
+};
+
+/* struct xt_mtchk_param and xt_tgchk_param look very similar */
+static void
+nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
+			  struct xt_match *match, void *info,
+			  union nft_entry *entry, u16 proto, bool inv)
+{
+	par->net	= ctx->net;
+	par->table	= ctx->table->name;
+	switch (ctx->family) {
+	case AF_INET:
+		entry->e4.ip.proto = proto;
+		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+		break;
+	case AF_INET6:
+		if (proto)
+			entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
+		entry->e6.ipv6.proto = proto;
+		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+		break;
+	case NFPROTO_BRIDGE:
+		entry->ebt.ethproto = (__force __be16)proto;
+		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+		break;
+	case NFPROTO_ARP:
+		break;
+	}
+	par->entryinfo	= entry;
+	par->match	= match;
+	par->matchinfo	= info;
+	if (nft_is_base_chain(ctx->chain)) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		par->hook_mask = 1 << ops->hooknum;
+	} else {
+		par->hook_mask = 0;
+	}
+	par->family	= ctx->family;
+	par->nft_compat = true;
+}
+
+static void match_compat_from_user(struct xt_match *m, void *in, void *out)
+{
+	int pad;
+
+	memcpy(out, in, m->matchsize);
+	pad = XT_ALIGN(m->matchsize) - m->matchsize;
+	if (pad > 0)
+		memset(out + m->matchsize, 0, pad);
+}
+
+static int
+__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		 const struct nlattr * const tb[],
+		 void *info)
+{
+	struct xt_match *match = expr->ops->data;
+	struct xt_mtchk_param par;
+	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+	u16 proto = 0;
+	bool inv = false;
+	union nft_entry e = {};
+	int ret;
+
+	match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
+
+	if (ctx->nla[NFTA_RULE_COMPAT]) {
+		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
+		if (ret < 0)
+			return ret;
+	}
+
+	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
+	nft_compat_wait_for_destructors();
+
+	return xt_check_match(&par, size, proto, inv);
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+	       const struct nlattr * const tb[])
+{
+	return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
+}
+
+static int
+nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		     const struct nlattr * const tb[])
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+	struct xt_match *m = expr->ops->data;
+	int ret;
+
+	priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+	if (!priv->info)
+		return -ENOMEM;
+
+	ret = __nft_match_init(ctx, expr, tb, priv->info);
+	if (ret)
+		kfree(priv->info);
+	return ret;
+}
+
+static void
+__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		    void *info)
+{
+	struct xt_match *match = expr->ops->data;
+	struct module *me = match->me;
+	struct xt_mtdtor_param par;
+
+	par.net = ctx->net;
+	par.match = match;
+	par.matchinfo = info;
+	par.family = ctx->family;
+	if (par.match->destroy != NULL)
+		par.match->destroy(&par);
+
+	__nft_mt_tg_destroy(me, expr);
+}
+
+static void
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
+}
+
+static void
+nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+	__nft_match_destroy(ctx, expr, priv->info);
+	kfree(priv->info);
+}
+
+static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			    void *info)
+{
+	struct xt_match *match = expr->ops->data;
+
+	if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
+	    nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
+	    nft_extension_dump_info(skb, NFTA_MATCH_INFO, info,
+				    match->matchsize, match->usersize))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	return __nft_match_dump(skb, expr, nft_expr_priv(expr));
+}
+
+static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(e);
+
+	return __nft_match_dump(skb, e, priv->info);
+}
+
+static int nft_match_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
+{
+	struct xt_match *match = expr->ops->data;
+	unsigned int hook_mask = 0;
+	int ret;
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_BRIDGE &&
+	    ctx->family != NFPROTO_ARP)
+		return -EOPNOTSUPP;
+
+	if (nft_is_base_chain(ctx->chain)) {
+		const struct nft_base_chain *basechain =
+						nft_base_chain(ctx->chain);
+		const struct nf_hook_ops *ops = &basechain->ops;
+
+		hook_mask = 1 << ops->hooknum;
+		if (match->hooks && !(hook_mask & match->hooks))
+			return -EINVAL;
+
+		ret = nft_compat_chain_validate_dependency(ctx, match->table);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int
+nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+		      int event, u16 family, const char *name,
+		      int rev, int target)
+{
+	struct nlmsghdr *nlh;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+	event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event);
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nlmsg_failure;
+
+	if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
+	    nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
+	    nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int nfnl_compat_get_rcu(struct sk_buff *skb,
+			       const struct nfnl_info *info,
+			       const struct nlattr * const tb[])
+{
+	u8 family = info->nfmsg->nfgen_family;
+	const char *name, *fmt;
+	struct sk_buff *skb2;
+	int ret = 0, target;
+	u32 rev;
+
+	if (tb[NFTA_COMPAT_NAME] == NULL ||
+	    tb[NFTA_COMPAT_REV] == NULL ||
+	    tb[NFTA_COMPAT_TYPE] == NULL)
+		return -EINVAL;
+
+	name = nla_data(tb[NFTA_COMPAT_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
+	target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
+
+	switch(family) {
+	case AF_INET:
+		fmt = "ipt_%s";
+		break;
+	case AF_INET6:
+		fmt = "ip6t_%s";
+		break;
+	case NFPROTO_BRIDGE:
+		fmt = "ebt_%s";
+		break;
+	case NFPROTO_ARP:
+		fmt = "arpt_%s";
+		break;
+	default:
+		pr_err("nft_compat: unsupported protocol %d\n", family);
+		return -EINVAL;
+	}
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	rcu_read_unlock();
+	try_then_request_module(xt_find_revision(family, name, rev, target, &ret),
+				fmt, name);
+	if (ret < 0)
+		goto out_put;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
+	/* include the best revision for this extension in the message */
+	if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
+				  info->nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+				  NFNL_MSG_COMPAT_GET,
+				  family, name, ret, target) <= 0) {
+		kfree_skb(skb2);
+		goto out_put;
+	}
+
+	ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+out_put:
+	rcu_read_lock();
+	module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
+	[NFTA_COMPAT_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = NFT_COMPAT_NAME_MAX-1 },
+	[NFTA_COMPAT_REV]	= { .type = NLA_U32 },
+	[NFTA_COMPAT_TYPE]	= { .type = NLA_U32 },
+};
+
+static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
+	[NFNL_MSG_COMPAT_GET]	= {
+		.call		= nfnl_compat_get_rcu,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_COMPAT_MAX,
+		.policy		= nfnl_compat_policy_get
+	},
+};
+
+static const struct nfnetlink_subsystem nfnl_compat_subsys = {
+	.name		= "nft-compat",
+	.subsys_id	= NFNL_SUBSYS_NFT_COMPAT,
+	.cb_count	= NFNL_MSG_COMPAT_MAX,
+	.cb		= nfnl_nft_compat_cb,
+};
+
+static struct nft_expr_type nft_match_type;
+
+static bool nft_match_reduce(struct nft_regs_track *track,
+			     const struct nft_expr *expr)
+{
+	const struct xt_match *match = expr->ops->data;
+
+	return strcmp(match->name, "comment") == 0;
+}
+
+static const struct nft_expr_ops *
+nft_match_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	struct nft_expr_ops *ops;
+	struct xt_match *match;
+	unsigned int matchsize;
+	char *mt_name;
+	u32 rev, family;
+	int err;
+
+	if (tb[NFTA_MATCH_NAME] == NULL ||
+	    tb[NFTA_MATCH_REV] == NULL ||
+	    tb[NFTA_MATCH_INFO] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	mt_name = nla_data(tb[NFTA_MATCH_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
+	family = ctx->family;
+
+	match = xt_request_find_match(family, mt_name, rev);
+	if (IS_ERR(match))
+		return ERR_PTR(-ENOENT);
+
+	if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	ops->type = &nft_match_type;
+	ops->eval = nft_match_eval;
+	ops->init = nft_match_init;
+	ops->destroy = nft_match_destroy;
+	ops->dump = nft_match_dump;
+	ops->validate = nft_match_validate;
+	ops->data = match;
+	ops->reduce = nft_match_reduce;
+
+	matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
+	if (matchsize > NFT_MATCH_LARGE_THRESH) {
+		matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
+
+		ops->eval = nft_match_large_eval;
+		ops->init = nft_match_large_init;
+		ops->destroy = nft_match_large_destroy;
+		ops->dump = nft_match_large_dump;
+	}
+
+	ops->size = matchsize;
+
+	return ops;
+err:
+	module_put(match->me);
+	return ERR_PTR(err);
+}
+
+static void nft_match_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_match *match = ops->data;
+
+	module_put(match->me);
+	kfree(ops);
+}
+
+static struct nft_expr_type nft_match_type __read_mostly = {
+	.name		= "match",
+	.select_ops	= nft_match_select_ops,
+	.release_ops	= nft_match_release_ops,
+	.policy		= nft_match_policy,
+	.maxattr	= NFTA_MATCH_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static struct nft_expr_type nft_target_type;
+
+static const struct nft_expr_ops *
+nft_target_select_ops(const struct nft_ctx *ctx,
+		      const struct nlattr * const tb[])
+{
+	struct nft_expr_ops *ops;
+	struct xt_target *target;
+	char *tg_name;
+	u32 rev, family;
+	int err;
+
+	if (tb[NFTA_TARGET_NAME] == NULL ||
+	    tb[NFTA_TARGET_REV] == NULL ||
+	    tb[NFTA_TARGET_INFO] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	tg_name = nla_data(tb[NFTA_TARGET_NAME]);
+	rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
+	family = ctx->family;
+
+	if (strcmp(tg_name, XT_ERROR_TARGET) == 0 ||
+	    strcmp(tg_name, XT_STANDARD_TARGET) == 0 ||
+	    strcmp(tg_name, "standard") == 0)
+		return ERR_PTR(-EINVAL);
+
+	target = xt_request_find_target(family, tg_name, rev);
+	if (IS_ERR(target))
+		return ERR_PTR(-ENOENT);
+
+	if (!target->target) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	ops->type = &nft_target_type;
+	ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
+	ops->init = nft_target_init;
+	ops->destroy = nft_target_destroy;
+	ops->dump = nft_target_dump;
+	ops->validate = nft_target_validate;
+	ops->data = target;
+	ops->reduce = NFT_REDUCE_READONLY;
+
+	if (family == NFPROTO_BRIDGE)
+		ops->eval = nft_target_eval_bridge;
+	else
+		ops->eval = nft_target_eval_xt;
+
+	return ops;
+err:
+	module_put(target->me);
+	return ERR_PTR(err);
+}
+
+static void nft_target_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_target *target = ops->data;
+
+	module_put(target->me);
+	kfree(ops);
+}
+
+static struct nft_expr_type nft_target_type __read_mostly = {
+	.name		= "target",
+	.select_ops	= nft_target_select_ops,
+	.release_ops	= nft_target_release_ops,
+	.policy		= nft_target_policy,
+	.maxattr	= NFTA_TARGET_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_compat_module_init(void)
+{
+	int ret;
+
+	ret = nft_register_expr(&nft_match_type);
+	if (ret < 0)
+		return ret;
+
+	ret = nft_register_expr(&nft_target_type);
+	if (ret < 0)
+		goto err_match;
+
+	ret = nfnetlink_subsys_register(&nfnl_compat_subsys);
+	if (ret < 0) {
+		pr_err("nft_compat: cannot register with nfnetlink.\n");
+		goto err_target;
+	}
+
+	return ret;
+err_target:
+	nft_unregister_expr(&nft_target_type);
+err_match:
+	nft_unregister_expr(&nft_match_type);
+	return ret;
+}
+
+static void __exit nft_compat_module_exit(void)
+{
+	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
+	nft_unregister_expr(&nft_target_type);
+	nft_unregister_expr(&nft_match_type);
+}
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
+
+module_init(nft_compat_module_init);
+module_exit(nft_compat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("match");
+MODULE_ALIAS_NFT_EXPR("target");
+MODULE_DESCRIPTION("x_tables over nftables support");
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
new file mode 100644
index 000000000..d657f999a
--- /dev/null
+++ b/net/netfilter/nft_connlimit.c
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+struct nft_connlimit {
+	struct nf_conncount_list	*list;
+	u32				limit;
+	bool				invert;
+};
+
+static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
+					 struct nft_regs *regs,
+					 const struct nft_pktinfo *pkt,
+					 const struct nft_set_ext *ext)
+{
+	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+	const struct nf_conntrack_tuple *tuple_ptr;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int count;
+
+	tuple_ptr = &tuple;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+	if (ct != NULL) {
+		tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+		zone = nf_ct_zone(ct);
+	} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
+				      nft_pf(pkt), nft_net(pkt), &tuple)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	count = priv->list->count;
+
+	if ((count > priv->limit) ^ priv->invert) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+}
+
+static int nft_connlimit_do_init(const struct nft_ctx *ctx,
+				 const struct nlattr * const tb[],
+				 struct nft_connlimit *priv)
+{
+	bool invert = false;
+	u32 flags, limit;
+	int err;
+
+	if (!tb[NFTA_CONNLIMIT_COUNT])
+		return -EINVAL;
+
+	limit = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_COUNT]));
+
+	if (tb[NFTA_CONNLIMIT_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_FLAGS]));
+		if (flags & ~NFT_CONNLIMIT_F_INV)
+			return -EOPNOTSUPP;
+		if (flags & NFT_CONNLIMIT_F_INV)
+			invert = true;
+	}
+
+	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
+	if (!priv->list)
+		return -ENOMEM;
+
+	nf_conncount_list_init(priv->list);
+	priv->limit	= limit;
+	priv->invert	= invert;
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err < 0)
+		goto err_netns;
+
+	return 0;
+err_netns:
+	kfree(priv->list);
+
+	return err;
+}
+
+static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
+				     struct nft_connlimit *priv)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+	nf_conncount_cache_free(priv->list);
+	kfree(priv->list);
+}
+
+static int nft_connlimit_do_dump(struct sk_buff *skb,
+				 struct nft_connlimit *priv)
+{
+	if (nla_put_be32(skb, NFTA_CONNLIMIT_COUNT, htonl(priv->limit)))
+		goto nla_put_failure;
+	if (priv->invert &&
+	    nla_put_be32(skb, NFTA_CONNLIMIT_FLAGS, htonl(NFT_CONNLIMIT_F_INV)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static inline void nft_connlimit_obj_eval(struct nft_object *obj,
+					struct nft_regs *regs,
+					const struct nft_pktinfo *pkt)
+{
+	struct nft_connlimit *priv = nft_obj_data(obj);
+
+	nft_connlimit_do_eval(priv, regs, pkt, NULL);
+}
+
+static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_object *obj)
+{
+	struct nft_connlimit *priv = nft_obj_data(obj);
+
+	return nft_connlimit_do_init(ctx, tb, priv);
+}
+
+static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
+				      struct nft_object *obj)
+{
+	struct nft_connlimit *priv = nft_obj_data(obj);
+
+	nft_connlimit_do_destroy(ctx, priv);
+}
+
+static int nft_connlimit_obj_dump(struct sk_buff *skb,
+				  struct nft_object *obj, bool reset)
+{
+	struct nft_connlimit *priv = nft_obj_data(obj);
+
+	return nft_connlimit_do_dump(skb, priv);
+}
+
+static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = {
+	[NFTA_CONNLIMIT_COUNT]	= { .type = NLA_U32 },
+	[NFTA_CONNLIMIT_FLAGS]	= { .type = NLA_U32 },
+};
+
+static struct nft_object_type nft_connlimit_obj_type;
+static const struct nft_object_ops nft_connlimit_obj_ops = {
+	.type		= &nft_connlimit_obj_type,
+	.size		= sizeof(struct nft_connlimit),
+	.eval		= nft_connlimit_obj_eval,
+	.init		= nft_connlimit_obj_init,
+	.destroy	= nft_connlimit_obj_destroy,
+	.dump		= nft_connlimit_obj_dump,
+};
+
+static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CONNLIMIT,
+	.ops		= &nft_connlimit_obj_ops,
+	.maxattr	= NFTA_CONNLIMIT_MAX,
+	.policy		= nft_connlimit_policy,
+	.owner		= THIS_MODULE,
+};
+
+static void nft_connlimit_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+
+	nft_connlimit_do_eval(priv, regs, pkt, NULL);
+}
+
+static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+
+	return nft_connlimit_do_dump(skb, priv);
+}
+
+static int nft_connlimit_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+
+	return nft_connlimit_do_init(ctx, tb, priv);
+}
+
+static void nft_connlimit_destroy(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+
+	nft_connlimit_do_destroy(ctx, priv);
+}
+
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
+	struct nft_connlimit *priv_src = nft_expr_priv(src);
+
+	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+	if (!priv_dst->list)
+		return -ENOMEM;
+
+	nf_conncount_list_init(priv_dst->list);
+	priv_dst->limit	 = priv_src->limit;
+	priv_dst->invert = priv_src->invert;
+
+	return 0;
+}
+
+static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
+					const struct nft_expr *expr)
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+
+	nf_conncount_cache_free(priv->list);
+	kfree(priv->list);
+}
+
+static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
+{
+	struct nft_connlimit *priv = nft_expr_priv(expr);
+	bool ret;
+
+	local_bh_disable();
+	ret = nf_conncount_gc_list(net, priv->list);
+	local_bh_enable();
+
+	return ret;
+}
+
+static struct nft_expr_type nft_connlimit_type;
+static const struct nft_expr_ops nft_connlimit_ops = {
+	.type		= &nft_connlimit_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_connlimit)),
+	.eval		= nft_connlimit_eval,
+	.init		= nft_connlimit_init,
+	.destroy	= nft_connlimit_destroy,
+	.clone		= nft_connlimit_clone,
+	.destroy_clone	= nft_connlimit_destroy_clone,
+	.dump		= nft_connlimit_dump,
+	.gc		= nft_connlimit_gc,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_connlimit_type __read_mostly = {
+	.name		= "connlimit",
+	.ops		= &nft_connlimit_ops,
+	.policy		= nft_connlimit_policy,
+	.maxattr	= NFTA_CONNLIMIT_MAX,
+	.flags		= NFT_EXPR_STATEFUL | NFT_EXPR_GC,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_connlimit_module_init(void)
+{
+	int err;
+
+	err = nft_register_obj(&nft_connlimit_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_connlimit_type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err1:
+	nft_unregister_obj(&nft_connlimit_obj_type);
+	return err;
+}
+
+static void __exit nft_connlimit_module_exit(void)
+{
+	nft_unregister_expr(&nft_connlimit_type);
+	nft_unregister_obj(&nft_connlimit_obj_type);
+}
+
+module_init(nft_connlimit_module_init);
+module_exit(nft_connlimit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso");
+MODULE_ALIAS_NFT_EXPR("connlimit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
+MODULE_DESCRIPTION("nftables connlimit rule support");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 000000000..f4d3573e8
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables_offload.h>
+
+struct nft_counter {
+	s64		bytes;
+	s64		packets;
+};
+
+struct nft_counter_percpu_priv {
+	struct nft_counter __percpu *counter;
+};
+
+static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+
+static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
+				       struct nft_regs *regs,
+				       const struct nft_pktinfo *pkt)
+{
+	struct nft_counter *this_cpu;
+	seqcount_t *myseq;
+
+	local_bh_disable();
+	this_cpu = this_cpu_ptr(priv->counter);
+	myseq = this_cpu_ptr(&nft_counter_seq);
+
+	write_seqcount_begin(myseq);
+
+	this_cpu->bytes += pkt->skb->len;
+	this_cpu->packets++;
+
+	write_seqcount_end(myseq);
+	local_bh_enable();
+}
+
+static inline void nft_counter_obj_eval(struct nft_object *obj,
+					struct nft_regs *regs,
+					const struct nft_pktinfo *pkt)
+{
+	struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+	nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_do_init(const struct nlattr * const tb[],
+			       struct nft_counter_percpu_priv *priv)
+{
+	struct nft_counter __percpu *cpu_stats;
+	struct nft_counter *this_cpu;
+
+	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
+	if (cpu_stats == NULL)
+		return -ENOMEM;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(cpu_stats);
+	if (tb[NFTA_COUNTER_PACKETS]) {
+	        this_cpu->packets =
+			be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	}
+	if (tb[NFTA_COUNTER_BYTES]) {
+		this_cpu->bytes =
+			be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+	}
+	preempt_enable();
+	priv->counter = cpu_stats;
+	return 0;
+}
+
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_object *obj)
+{
+	struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+	return nft_counter_do_init(tb, priv);
+}
+
+static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
+{
+	free_percpu(priv->counter);
+}
+
+static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
+				    struct nft_object *obj)
+{
+	struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+	nft_counter_do_destroy(priv);
+}
+
+static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
+			      struct nft_counter *total)
+{
+	struct nft_counter *this_cpu;
+
+	local_bh_disable();
+	this_cpu = this_cpu_ptr(priv->counter);
+	this_cpu->packets -= total->packets;
+	this_cpu->bytes -= total->bytes;
+	local_bh_enable();
+}
+
+static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
+			      struct nft_counter *total)
+{
+	struct nft_counter *this_cpu;
+	const seqcount_t *myseq;
+	u64 bytes, packets;
+	unsigned int seq;
+	int cpu;
+
+	memset(total, 0, sizeof(*total));
+	for_each_possible_cpu(cpu) {
+		myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+		this_cpu = per_cpu_ptr(priv->counter, cpu);
+		do {
+			seq	= read_seqcount_begin(myseq);
+			bytes	= this_cpu->bytes;
+			packets	= this_cpu->packets;
+		} while (read_seqcount_retry(myseq, seq));
+
+		total->bytes	+= bytes;
+		total->packets	+= packets;
+	}
+}
+
+static int nft_counter_do_dump(struct sk_buff *skb,
+			       struct nft_counter_percpu_priv *priv,
+			       bool reset)
+{
+	struct nft_counter total;
+
+	nft_counter_fetch(priv, &total);
+
+	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
+			 NFTA_COUNTER_PAD) ||
+	    nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets),
+			 NFTA_COUNTER_PAD))
+		goto nla_put_failure;
+
+	if (reset)
+		nft_counter_reset(priv, &total);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_counter_obj_dump(struct sk_buff *skb,
+				struct nft_object *obj, bool reset)
+{
+	struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+	return nft_counter_do_dump(skb, priv, reset);
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+	.type		= &nft_counter_obj_type,
+	.size		= sizeof(struct nft_counter_percpu_priv),
+	.eval		= nft_counter_obj_eval,
+	.init		= nft_counter_obj_init,
+	.destroy	= nft_counter_obj_destroy,
+	.dump		= nft_counter_obj_dump,
+};
+
+struct nft_object_type nft_counter_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_COUNTER,
+	.ops		= &nft_counter_obj_ops,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.policy		= nft_counter_policy,
+	.owner		= THIS_MODULE,
+};
+
+void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		      const struct nft_pktinfo *pkt)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+	nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+	return nft_counter_do_dump(skb, priv, false);
+}
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+	return nft_counter_do_init(tb, priv);
+}
+
+static void nft_counter_destroy(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+	nft_counter_do_destroy(priv);
+}
+
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+	struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+	struct nft_counter __percpu *cpu_stats;
+	struct nft_counter *this_cpu;
+	struct nft_counter total;
+
+	nft_counter_fetch(priv, &total);
+
+	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+	if (cpu_stats == NULL)
+		return -ENOMEM;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(cpu_stats);
+	this_cpu->packets = total.packets;
+	this_cpu->bytes = total.bytes;
+	preempt_enable();
+
+	priv_clone->counter = cpu_stats;
+	return 0;
+}
+
+static int nft_counter_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       const struct nft_expr *expr)
+{
+	/* No specific offload action is needed, but report success. */
+	return 0;
+}
+
+static void nft_counter_offload_stats(struct nft_expr *expr,
+				      const struct flow_stats *stats)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+	struct nft_counter *this_cpu;
+	seqcount_t *myseq;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(priv->counter);
+	myseq = this_cpu_ptr(&nft_counter_seq);
+
+	write_seqcount_begin(myseq);
+	this_cpu->packets += stats->pkts;
+	this_cpu->bytes += stats->bytes;
+	write_seqcount_end(myseq);
+	preempt_enable();
+}
+
+void nft_counter_init_seqcount(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+}
+
+struct nft_expr_type nft_counter_type;
+static const struct nft_expr_ops nft_counter_ops = {
+	.type		= &nft_counter_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
+	.eval		= nft_counter_eval,
+	.init		= nft_counter_init,
+	.destroy	= nft_counter_destroy,
+	.destroy_clone	= nft_counter_destroy,
+	.dump		= nft_counter_dump,
+	.clone		= nft_counter_clone,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_counter_offload,
+	.offload_stats	= nft_counter_offload_stats,
+};
+
+struct nft_expr_type nft_counter_type __read_mostly = {
+	.name		= "counter",
+	.ops		= &nft_counter_ops,
+	.policy		= nft_counter_policy,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 000000000..641dc21f9
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,1401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+struct nft_ct {
+	enum nft_ct_keys	key:8;
+	enum ip_conntrack_dir	dir:8;
+	u8			len;
+	union {
+		u8		dreg;
+		u8		sreg;
+	};
+};
+
+struct nft_ct_helper_obj  {
+	struct nf_conntrack_helper *helper4;
+	struct nf_conntrack_helper *helper6;
+	u8 l4proto;
+};
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
+static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+static DEFINE_MUTEX(nft_ct_pcpu_mutex);
+#endif
+
+static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
+				   enum nft_ct_keys k,
+				   enum ip_conntrack_dir d)
+{
+	if (d < IP_CT_DIR_MAX)
+		return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
+					   atomic64_read(&c[d].packets);
+
+	return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
+	       nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
+}
+
+static void nft_ct_get_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_tuple *tuple;
+	const struct nf_conntrack_helper *helper;
+	unsigned int state;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+		if (ct)
+			state = NF_CT_STATE_BIT(ctinfo);
+		else if (ctinfo == IP_CT_UNTRACKED)
+			state = NF_CT_STATE_UNTRACKED_BIT;
+		else
+			state = NF_CT_STATE_INVALID_BIT;
+		*dest = state;
+		return;
+	default:
+		break;
+	}
+
+	if (ct == NULL)
+		goto err;
+
+	switch (priv->key) {
+	case NFT_CT_DIRECTION:
+		nft_reg_store8(dest, CTINFO2DIR(ctinfo));
+		return;
+	case NFT_CT_STATUS:
+		*dest = ct->status;
+		return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		*dest = READ_ONCE(ct->mark);
+		return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		*dest = ct->secmark;
+		return;
+#endif
+	case NFT_CT_EXPIRATION:
+		*dest = jiffies_to_msecs(nf_ct_expires(ct));
+		return;
+	case NFT_CT_HELPER:
+		if (ct->master == NULL)
+			goto err;
+		help = nfct_help(ct->master);
+		if (help == NULL)
+			goto err;
+		helper = rcu_dereference(help->helper);
+		if (helper == NULL)
+			goto err;
+		strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
+		return;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS: {
+		struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+
+		if (labels)
+			memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
+		else
+			memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
+		return;
+	}
+#endif
+	case NFT_CT_BYTES:
+	case NFT_CT_PKTS: {
+		const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+		u64 count = 0;
+
+		if (acct)
+			count = nft_ct_get_eval_counter(acct->counter,
+							priv->key, priv->dir);
+		memcpy(dest, &count, sizeof(count));
+		return;
+	}
+	case NFT_CT_AVGPKT: {
+		const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+		u64 avgcnt = 0, bcnt = 0, pcnt = 0;
+
+		if (acct) {
+			pcnt = nft_ct_get_eval_counter(acct->counter,
+						       NFT_CT_PKTS, priv->dir);
+			bcnt = nft_ct_get_eval_counter(acct->counter,
+						       NFT_CT_BYTES, priv->dir);
+			if (pcnt != 0)
+				avgcnt = div64_u64(bcnt, pcnt);
+		}
+
+		memcpy(dest, &avgcnt, sizeof(avgcnt));
+		return;
+	}
+	case NFT_CT_L3PROTOCOL:
+		nft_reg_store8(dest, nf_ct_l3num(ct));
+		return;
+	case NFT_CT_PROTOCOL:
+		nft_reg_store8(dest, nf_ct_protonum(ct));
+		return;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	case NFT_CT_ZONE: {
+		const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+		u16 zoneid;
+
+		if (priv->dir < IP_CT_DIR_MAX)
+			zoneid = nf_ct_zone_id(zone, priv->dir);
+		else
+			zoneid = zone->id;
+
+		nft_reg_store16(dest, zoneid);
+		return;
+	}
+#endif
+	case NFT_CT_ID:
+		*dest = nf_ct_get_id(ct);
+		return;
+	default:
+		break;
+	}
+
+	tuple = &ct->tuplehash[priv->dir].tuple;
+	switch (priv->key) {
+	case NFT_CT_SRC:
+		memcpy(dest, tuple->src.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_DST:
+		memcpy(dest, tuple->dst.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_PROTO_SRC:
+		nft_reg_store16(dest, (__force u16)tuple->src.u.all);
+		return;
+	case NFT_CT_PROTO_DST:
+		nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
+		return;
+	case NFT_CT_SRC_IP:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+			goto err;
+		*dest = (__force __u32)tuple->src.u3.ip;
+		return;
+	case NFT_CT_DST_IP:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+			goto err;
+		*dest = (__force __u32)tuple->dst.u3.ip;
+		return;
+	case NFT_CT_SRC_IP6:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+			goto err;
+		memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
+		return;
+	case NFT_CT_DST_IP6:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+			goto err;
+		memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
+		return;
+	default:
+		break;
+	}
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	enum ip_conntrack_info ctinfo;
+	u16 value = nft_reg_load16(&regs->data[priv->sreg]);
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) /* already tracked */
+		return;
+
+	zone.id = value;
+
+	switch (priv->dir) {
+	case IP_CT_DIR_ORIGINAL:
+		zone.dir = NF_CT_ZONE_DIR_ORIG;
+		break;
+	case IP_CT_DIR_REPLY:
+		zone.dir = NF_CT_ZONE_DIR_REPL;
+		break;
+	default:
+		break;
+	}
+
+	ct = this_cpu_read(nft_ct_pcpu_template);
+
+	if (likely(refcount_read(&ct->ct_general.use) == 1)) {
+		refcount_inc(&ct->ct_general.use);
+		nf_ct_zone_add(ct, &zone);
+	} else {
+		/* previous skb got queued to userspace, allocate temporary
+		 * one until percpu template can be reused.
+		 */
+		ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
+		if (!ct) {
+			regs->verdict.code = NF_DROP;
+			return;
+		}
+	}
+
+	nf_ct_set(skb, ct, IP_CT_NEW);
+}
+#endif
+
+static void nft_ct_set_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
+	u32 value = regs->data[priv->sreg];
+#endif
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL || nf_ct_is_template(ct))
+		return;
+
+	switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		if (READ_ONCE(ct->mark) != value) {
+			WRITE_ONCE(ct->mark, value);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		if (ct->secmark != value) {
+			ct->secmark = value;
+			nf_conntrack_event_cache(IPCT_SECMARK, ct);
+		}
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS:
+		nf_connlabels_replace(ct,
+				      &regs->data[priv->sreg],
+				      &regs->data[priv->sreg],
+				      NF_CT_LABELS_MAX_SIZE / sizeof(u32));
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	case NFT_CT_EVENTMASK: {
+		struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
+		u32 ctmask = regs->data[priv->sreg];
+
+		if (e) {
+			if (e->ctmask != ctmask)
+				e->ctmask = ctmask;
+			break;
+		}
+
+		if (ctmask && !nf_ct_is_confirmed(ct))
+			nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
+		break;
+	}
+#endif
+	default:
+		break;
+	}
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+	[NFTA_CT_DREG]		= { .type = NLA_U32 },
+	[NFTA_CT_KEY]		= { .type = NLA_U32 },
+	[NFTA_CT_DIRECTION]	= { .type = NLA_U8 },
+	[NFTA_CT_SREG]		= { .type = NLA_U32 },
+};
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_tmpl_put_pcpu(void)
+{
+	struct nf_conn *ct;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ct = per_cpu(nft_ct_pcpu_template, cpu);
+		if (!ct)
+			break;
+		nf_ct_put(ct);
+		per_cpu(nft_ct_pcpu_template, cpu) = NULL;
+	}
+}
+
+static bool nft_ct_tmpl_alloc_pcpu(void)
+{
+	struct nf_conntrack_zone zone = { .id = 0 };
+	struct nf_conn *tmp;
+	int cpu;
+
+	if (nft_ct_pcpu_template_refcnt)
+		return true;
+
+	for_each_possible_cpu(cpu) {
+		tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
+		if (!tmp) {
+			nft_ct_tmpl_put_pcpu();
+			return false;
+		}
+
+		per_cpu(nft_ct_pcpu_template, cpu) = tmp;
+	}
+
+	return true;
+}
+#endif
+
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+	unsigned int len;
+	int err;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+	priv->dir = IP_CT_DIR_MAX;
+	switch (priv->key) {
+	case NFT_CT_DIRECTION:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		len = sizeof(u8);
+		break;
+	case NFT_CT_STATE:
+	case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+#endif
+	case NFT_CT_EXPIRATION:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		len = sizeof(u32);
+		break;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		len = NF_CT_LABELS_MAX_SIZE;
+		break;
+#endif
+	case NFT_CT_HELPER:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		len = NF_CT_HELPER_NAME_LEN;
+		break;
+
+	case NFT_CT_L3PROTOCOL:
+	case NFT_CT_PROTOCOL:
+		/* For compatibility, do not report error if NFTA_CT_DIRECTION
+		 * attribute is specified.
+		 */
+		len = sizeof(u8);
+		break;
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+
+		switch (ctx->family) {
+		case NFPROTO_IPV4:
+			len = sizeof_field(struct nf_conntrack_tuple,
+					   src.u3.ip);
+			break;
+		case NFPROTO_IPV6:
+		case NFPROTO_INET:
+			len = sizeof_field(struct nf_conntrack_tuple,
+					   src.u3.ip6);
+			break;
+		default:
+			return -EAFNOSUPPORT;
+		}
+		break;
+	case NFT_CT_SRC_IP:
+	case NFT_CT_DST_IP:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+
+		len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip);
+		break;
+	case NFT_CT_SRC_IP6:
+	case NFT_CT_DST_IP6:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+
+		len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip6);
+		break;
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+		len = sizeof_field(struct nf_conntrack_tuple, src.u.all);
+		break;
+	case NFT_CT_BYTES:
+	case NFT_CT_PKTS:
+	case NFT_CT_AVGPKT:
+		len = sizeof(u64);
+		break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	case NFT_CT_ZONE:
+		len = sizeof(u16);
+		break;
+#endif
+	case NFT_CT_ID:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_CT_DIRECTION] != NULL) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	priv->len = len;
+	err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL,
+				       NFT_DATA_VALUE, len);
+	if (err < 0)
+		return err;
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err < 0)
+		return err;
+
+	if (priv->key == NFT_CT_BYTES ||
+	    priv->key == NFT_CT_PKTS  ||
+	    priv->key == NFT_CT_AVGPKT)
+		nf_ct_set_acct(ctx->net, true);
+
+	return 0;
+}
+
+static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
+{
+	switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS:
+		nf_connlabels_put(ctx->net);
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	case NFT_CT_ZONE:
+		mutex_lock(&nft_ct_pcpu_mutex);
+		if (--nft_ct_pcpu_template_refcnt == 0)
+			nft_ct_tmpl_put_pcpu();
+		mutex_unlock(&nft_ct_pcpu_mutex);
+		break;
+#endif
+	default:
+		break;
+	}
+}
+
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+	unsigned int len;
+	int err;
+
+	priv->dir = IP_CT_DIR_MAX;
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+	switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
+		len = sizeof_field(struct nf_conn, mark);
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
+		len = NF_CT_LABELS_MAX_SIZE;
+		err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
+		if (err)
+			return err;
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	case NFT_CT_ZONE:
+		mutex_lock(&nft_ct_pcpu_mutex);
+		if (!nft_ct_tmpl_alloc_pcpu()) {
+			mutex_unlock(&nft_ct_pcpu_mutex);
+			return -ENOMEM;
+		}
+		nft_ct_pcpu_template_refcnt++;
+		mutex_unlock(&nft_ct_pcpu_mutex);
+		len = sizeof(u16);
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	case NFT_CT_EVENTMASK:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
+		len = sizeof(u32);
+		break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
+		len = sizeof(u32);
+		break;
+#endif
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_CT_DIRECTION]) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			err = -EINVAL;
+			goto err1;
+		}
+	}
+
+	priv->len = len;
+	err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
+	if (err < 0)
+		goto err1;
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	__nft_ct_set_destroy(ctx, priv);
+	return err;
+}
+
+static void nft_ct_get_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static void nft_ct_set_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+
+	__nft_ct_set_destroy(ctx, priv);
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+
+	switch (priv->key) {
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+	case NFT_CT_SRC_IP:
+	case NFT_CT_DST_IP:
+	case NFT_CT_SRC_IP6:
+	case NFT_CT_DST_IP6:
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+			goto nla_put_failure;
+		break;
+	case NFT_CT_BYTES:
+	case NFT_CT_PKTS:
+	case NFT_CT_AVGPKT:
+	case NFT_CT_ZONE:
+		if (priv->dir < IP_CT_DIR_MAX &&
+		    nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+			goto nla_put_failure;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_ct_get_reduce(struct nft_regs_track *track,
+			      const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	const struct nft_ct *ct;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	ct = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != ct->key) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+
+static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+
+	switch (priv->key) {
+	case NFT_CT_ZONE:
+		if (priv->dir < IP_CT_DIR_MAX &&
+		    nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+			goto nla_put_failure;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_ct_type;
+static const struct nft_expr_ops nft_ct_get_ops = {
+	.type		= &nft_ct_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.eval		= nft_ct_get_eval,
+	.init		= nft_ct_get_init,
+	.destroy	= nft_ct_get_destroy,
+	.dump		= nft_ct_get_dump,
+	.reduce		= nft_ct_get_reduce,
+};
+
+static bool nft_ct_set_reduce(struct nft_regs_track *track,
+			      const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_ct_get_ops)
+			continue;
+
+		__nft_reg_track_cancel(track, i);
+	}
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_ct_set_ops = {
+	.type		= &nft_ct_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.eval		= nft_ct_set_eval,
+	.init		= nft_ct_set_init,
+	.destroy	= nft_ct_set_destroy,
+	.dump		= nft_ct_set_dump,
+	.reduce		= nft_ct_set_reduce,
+};
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static const struct nft_expr_ops nft_ct_set_zone_ops = {
+	.type		= &nft_ct_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.eval		= nft_ct_set_zone_eval,
+	.init		= nft_ct_set_init,
+	.destroy	= nft_ct_set_destroy,
+	.dump		= nft_ct_set_dump,
+	.reduce		= nft_ct_set_reduce,
+};
+#endif
+
+static const struct nft_expr_ops *
+nft_ct_select_ops(const struct nft_ctx *ctx,
+		    const struct nlattr * const tb[])
+{
+	if (tb[NFTA_CT_KEY] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_CT_DREG])
+		return &nft_ct_get_ops;
+
+	if (tb[NFTA_CT_SREG]) {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+		if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
+			return &nft_ct_set_zone_ops;
+#endif
+		return &nft_ct_set_ops;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ct_type __read_mostly = {
+	.name		= "ct",
+	.select_ops	= nft_ct_select_ops,
+	.policy		= nft_ct_policy,
+	.maxattr	= NFTA_CT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static void nft_notrack_eval(const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_pktinfo *pkt)
+{
+	struct sk_buff *skb = pkt->skb;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+	/* Previously seen (loopback or untracked)?  Ignore. */
+	if (ct || ctinfo == IP_CT_UNTRACKED)
+		return;
+
+	nf_ct_set(skb, ct, IP_CT_UNTRACKED);
+}
+
+static struct nft_expr_type nft_notrack_type;
+static const struct nft_expr_ops nft_notrack_ops = {
+	.type		= &nft_notrack_type,
+	.size		= NFT_EXPR_SIZE(0),
+	.eval		= nft_notrack_eval,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_notrack_type __read_mostly = {
+	.name		= "notrack",
+	.ops		= &nft_notrack_ops,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+static int
+nft_ct_timeout_parse_policy(void *timeouts,
+			    const struct nf_conntrack_l4proto *l4proto,
+			    struct net *net, const struct nlattr *attr)
+{
+	struct nlattr **tb;
+	int ret = 0;
+
+	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+		     GFP_KERNEL);
+
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nla_parse_nested_deprecated(tb,
+					  l4proto->ctnl_timeout.nlattr_max,
+					  attr,
+					  l4proto->ctnl_timeout.nla_policy,
+					  NULL);
+	if (ret < 0)
+		goto err;
+
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+
+err:
+	kfree(tb);
+	return ret;
+}
+
+struct nft_ct_timeout_obj {
+	struct nf_ct_timeout    *timeout;
+	u8			l4proto;
+};
+
+static void nft_ct_timeout_obj_eval(struct nft_object *obj,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+	struct nf_conn_timeout *timeout;
+	const unsigned int *values;
+
+	if (priv->l4proto != pkt->tprot)
+		return;
+
+	if (!ct || nf_ct_is_template(ct) || nf_ct_is_confirmed(ct))
+		return;
+
+	timeout = nf_ct_timeout_find(ct);
+	if (!timeout) {
+		timeout = nf_ct_timeout_ext_add(ct, priv->timeout, GFP_ATOMIC);
+		if (!timeout) {
+			regs->verdict.code = NF_DROP;
+			return;
+		}
+	}
+
+	rcu_assign_pointer(timeout->timeout, priv->timeout);
+
+	/* adjust the timeout as per 'new' state. ct is unconfirmed,
+	 * so the current timestamp must not be added.
+	 */
+	values = nf_ct_timeout_data(timeout);
+	if (values)
+		nf_ct_refresh(ct, pkt->skb, values[0]);
+}
+
+static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
+				   const struct nlattr * const tb[],
+				   struct nft_object *obj)
+{
+	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nf_ct_timeout *timeout;
+	int l3num = ctx->family;
+	__u8 l4num;
+	int ret;
+
+	if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
+	    !tb[NFTA_CT_TIMEOUT_DATA])
+		return -EINVAL;
+
+	if (tb[NFTA_CT_TIMEOUT_L3PROTO])
+		l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
+
+	l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
+	priv->l4proto = l4num;
+
+	l4proto = nf_ct_l4proto_find(l4num);
+
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
+		goto err_proto_put;
+	}
+
+	timeout = kzalloc(sizeof(struct nf_ct_timeout) +
+			  l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
+	if (timeout == NULL) {
+		ret = -ENOMEM;
+		goto err_proto_put;
+	}
+
+	ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net,
+					  tb[NFTA_CT_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err_free_timeout;
+
+	timeout->l3num = l3num;
+	timeout->l4proto = l4proto;
+
+	ret = nf_ct_netns_get(ctx->net, ctx->family);
+	if (ret < 0)
+		goto err_free_timeout;
+
+	priv->timeout = timeout;
+	return 0;
+
+err_free_timeout:
+	kfree(timeout);
+err_proto_put:
+	return ret;
+}
+
+static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
+				       struct nft_object *obj)
+{
+	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	struct nf_ct_timeout *timeout = priv->timeout;
+
+	nf_ct_untimeout(ctx->net, timeout);
+	nf_ct_netns_put(ctx->net, ctx->family);
+	kfree(priv->timeout);
+}
+
+static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
+				   struct nft_object *obj, bool reset)
+{
+	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	const struct nf_ct_timeout *timeout = priv->timeout;
+	struct nlattr *nest_params;
+	int ret;
+
+	if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
+	    nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
+		return -1;
+
+	nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA);
+	if (!nest_params)
+		return -1;
+
+	ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
+	if (ret < 0)
+		return -1;
+	nla_nest_end(skb, nest_params);
+	return 0;
+}
+
+static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = {
+	[NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 },
+	[NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 },
+	[NFTA_CT_TIMEOUT_DATA]	  = {.type = NLA_NESTED },
+};
+
+static struct nft_object_type nft_ct_timeout_obj_type;
+
+static const struct nft_object_ops nft_ct_timeout_obj_ops = {
+	.type		= &nft_ct_timeout_obj_type,
+	.size		= sizeof(struct nft_ct_timeout_obj),
+	.eval		= nft_ct_timeout_obj_eval,
+	.init		= nft_ct_timeout_obj_init,
+	.destroy	= nft_ct_timeout_obj_destroy,
+	.dump		= nft_ct_timeout_obj_dump,
+};
+
+static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_TIMEOUT,
+	.ops		= &nft_ct_timeout_obj_ops,
+	.maxattr	= NFTA_CT_TIMEOUT_MAX,
+	.policy		= nft_ct_timeout_policy,
+	.owner		= THIS_MODULE,
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+				  const struct nlattr * const tb[],
+				  struct nft_object *obj)
+{
+	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+	struct nf_conntrack_helper *help4, *help6;
+	char name[NF_CT_HELPER_NAME_LEN];
+	int family = ctx->family;
+	int err;
+
+	if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+		return -EINVAL;
+
+	priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+	if (!priv->l4proto)
+		return -ENOENT;
+
+	nla_strscpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+	if (tb[NFTA_CT_HELPER_L3PROTO])
+		family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+	help4 = NULL;
+	help6 = NULL;
+
+	switch (family) {
+	case NFPROTO_IPV4:
+		if (ctx->family == NFPROTO_IPV6)
+			return -EINVAL;
+
+		help4 = nf_conntrack_helper_try_module_get(name, family,
+							   priv->l4proto);
+		break;
+	case NFPROTO_IPV6:
+		if (ctx->family == NFPROTO_IPV4)
+			return -EINVAL;
+
+		help6 = nf_conntrack_helper_try_module_get(name, family,
+							   priv->l4proto);
+		break;
+	case NFPROTO_NETDEV:
+	case NFPROTO_BRIDGE:
+	case NFPROTO_INET:
+		help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+							   priv->l4proto);
+		help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+							   priv->l4proto);
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	/* && is intentional; only error if INET found neither ipv4 or ipv6 */
+	if (!help4 && !help6)
+		return -ENOENT;
+
+	priv->helper4 = help4;
+	priv->helper6 = help6;
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err < 0)
+		goto err_put_helper;
+
+	return 0;
+
+err_put_helper:
+	if (priv->helper4)
+		nf_conntrack_helper_put(priv->helper4);
+	if (priv->helper6)
+		nf_conntrack_helper_put(priv->helper6);
+	return err;
+}
+
+static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
+				      struct nft_object *obj)
+{
+	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+	if (priv->helper4)
+		nf_conntrack_helper_put(priv->helper4);
+	if (priv->helper6)
+		nf_conntrack_helper_put(priv->helper6);
+
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+				   struct nft_regs *regs,
+				   const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+	struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+	struct nf_conntrack_helper *to_assign = NULL;
+	struct nf_conn_help *help;
+
+	if (!ct ||
+	    nf_ct_is_confirmed(ct) ||
+	    nf_ct_is_template(ct) ||
+	    priv->l4proto != nf_ct_protonum(ct))
+		return;
+
+	switch (nf_ct_l3num(ct)) {
+	case NFPROTO_IPV4:
+		to_assign = priv->helper4;
+		break;
+	case NFPROTO_IPV6:
+		to_assign = priv->helper6;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	if (!to_assign)
+		return;
+
+	if (test_bit(IPS_HELPER_BIT, &ct->status))
+		return;
+
+	help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+	if (help) {
+		rcu_assign_pointer(help->helper, to_assign);
+		set_bit(IPS_HELPER_BIT, &ct->status);
+	}
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+				  struct nft_object *obj, bool reset)
+{
+	const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+	const struct nf_conntrack_helper *helper;
+	u16 family;
+
+	if (priv->helper4 && priv->helper6) {
+		family = NFPROTO_INET;
+		helper = priv->helper4;
+	} else if (priv->helper6) {
+		family = NFPROTO_IPV6;
+		helper = priv->helper6;
+	} else {
+		family = NFPROTO_IPV4;
+		helper = priv->helper4;
+	}
+
+	if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+		return -1;
+
+	if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+		return -1;
+
+	if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+		return -1;
+
+	return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+	[NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+				  .len = NF_CT_HELPER_NAME_LEN - 1 },
+	[NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+	[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+	.type		= &nft_ct_helper_obj_type,
+	.size		= sizeof(struct nft_ct_helper_obj),
+	.eval		= nft_ct_helper_obj_eval,
+	.init		= nft_ct_helper_obj_init,
+	.destroy	= nft_ct_helper_obj_destroy,
+	.dump		= nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_HELPER,
+	.ops		= &nft_ct_helper_obj_ops,
+	.maxattr	= NFTA_CT_HELPER_MAX,
+	.policy		= nft_ct_helper_policy,
+	.owner		= THIS_MODULE,
+};
+
+struct nft_ct_expect_obj {
+	u16		l3num;
+	__be16		dport;
+	u8		l4proto;
+	u8		size;
+	u32		timeout;
+};
+
+static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
+				  const struct nlattr * const tb[],
+				  struct nft_object *obj)
+{
+	struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+	if (!tb[NFTA_CT_EXPECT_L4PROTO] ||
+	    !tb[NFTA_CT_EXPECT_DPORT] ||
+	    !tb[NFTA_CT_EXPECT_TIMEOUT] ||
+	    !tb[NFTA_CT_EXPECT_SIZE])
+		return -EINVAL;
+
+	priv->l3num = ctx->family;
+	if (tb[NFTA_CT_EXPECT_L3PROTO])
+		priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+
+	priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+	priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
+	priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
+	priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
+
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_ct_expect_obj_destroy(const struct nft_ctx *ctx,
+				       struct nft_object *obj)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_ct_expect_obj_dump(struct sk_buff *skb,
+				  struct nft_object *obj, bool reset)
+{
+	const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+	if (nla_put_be16(skb, NFTA_CT_EXPECT_L3PROTO, htons(priv->l3num)) ||
+	    nla_put_u8(skb, NFTA_CT_EXPECT_L4PROTO, priv->l4proto) ||
+	    nla_put_be16(skb, NFTA_CT_EXPECT_DPORT, priv->dport) ||
+	    nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, priv->timeout) ||
+	    nla_put_u8(skb, NFTA_CT_EXPECT_SIZE, priv->size))
+		return -1;
+
+	return 0;
+}
+
+static void nft_ct_expect_obj_eval(struct nft_object *obj,
+				   struct nft_regs *regs,
+				   const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+	struct nf_conntrack_expect *exp;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_help *help;
+	enum ip_conntrack_dir dir;
+	u16 l3num = priv->l3num;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+	if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	dir = CTINFO2DIR(ctinfo);
+
+	help = nfct_help(ct);
+	if (!help)
+		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+	if (!help) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	if (help->expecting[NF_CT_EXPECT_CLASS_DEFAULT] >= priv->size) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	if (l3num == NFPROTO_INET)
+		l3num = nf_ct_l3num(ct);
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, l3num,
+		          &ct->tuplehash[!dir].tuple.src.u3,
+		          &ct->tuplehash[!dir].tuple.dst.u3,
+		          priv->l4proto, NULL, &priv->dport);
+	exp->timeout.expires = jiffies + priv->timeout * HZ;
+
+	if (nf_ct_expect_related(exp, 0) != 0)
+		regs->verdict.code = NF_DROP;
+}
+
+static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = {
+	[NFTA_CT_EXPECT_L3PROTO]	= { .type = NLA_U16 },
+	[NFTA_CT_EXPECT_L4PROTO]	= { .type = NLA_U8 },
+	[NFTA_CT_EXPECT_DPORT]		= { .type = NLA_U16 },
+	[NFTA_CT_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
+	[NFTA_CT_EXPECT_SIZE]		= { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_expect_obj_type;
+
+static const struct nft_object_ops nft_ct_expect_obj_ops = {
+	.type		= &nft_ct_expect_obj_type,
+	.size		= sizeof(struct nft_ct_expect_obj),
+	.eval		= nft_ct_expect_obj_eval,
+	.init		= nft_ct_expect_obj_init,
+	.destroy	= nft_ct_expect_obj_destroy,
+	.dump		= nft_ct_expect_obj_dump,
+};
+
+static struct nft_object_type nft_ct_expect_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_EXPECT,
+	.ops		= &nft_ct_expect_obj_ops,
+	.maxattr	= NFTA_CT_EXPECT_MAX,
+	.policy		= nft_ct_expect_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_ct_module_init(void)
+{
+	int err;
+
+	BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
+
+	err = nft_register_expr(&nft_ct_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_notrack_type);
+	if (err < 0)
+		goto err1;
+
+	err = nft_register_obj(&nft_ct_helper_obj_type);
+	if (err < 0)
+		goto err2;
+
+	err = nft_register_obj(&nft_ct_expect_obj_type);
+	if (err < 0)
+		goto err3;
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	err = nft_register_obj(&nft_ct_timeout_obj_type);
+	if (err < 0)
+		goto err4;
+#endif
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+err4:
+	nft_unregister_obj(&nft_ct_expect_obj_type);
+#endif
+err3:
+	nft_unregister_obj(&nft_ct_helper_obj_type);
+err2:
+	nft_unregister_expr(&nft_notrack_type);
+err1:
+	nft_unregister_expr(&nft_ct_type);
+	return err;
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	nft_unregister_obj(&nft_ct_timeout_obj_type);
+#endif
+	nft_unregister_obj(&nft_ct_expect_obj_type);
+	nft_unregister_obj(&nft_ct_helper_obj_type);
+	nft_unregister_expr(&nft_notrack_type);
+	nft_unregister_expr(&nft_ct_type);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
+MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT);
+MODULE_DESCRIPTION("Netfilter nf_tables conntrack module");
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
new file mode 100644
index 000000000..635074027
--- /dev/null
+++ b/net/netfilter/nft_dup_netdev.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+struct nft_dup_netdev {
+	u8	sreg_dev;
+};
+
+static void nft_dup_netdev_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_dup_netdev *priv = nft_expr_priv(expr);
+	int oif = regs->data[priv->sreg_dev];
+
+	nf_dup_netdev_egress(pkt, oif);
+}
+
+static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = {
+	[NFTA_DUP_SREG_DEV]	= { .type = NLA_U32 },
+};
+
+static int nft_dup_netdev_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_DUP_SREG_DEV] == NULL)
+		return -EINVAL;
+
+	return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+				       sizeof(int));
+}
+
+static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_dup_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
+}
+
+static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
+{
+	return true;
+}
+
+static struct nft_expr_type nft_dup_netdev_type;
+static const struct nft_expr_ops nft_dup_netdev_ops = {
+	.type		= &nft_dup_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)),
+	.eval		= nft_dup_netdev_eval,
+	.init		= nft_dup_netdev_init,
+	.dump		= nft_dup_netdev_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_dup_netdev_offload,
+	.offload_action	= nft_dup_netdev_offload_action,
+};
+
+static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "dup",
+	.ops		= &nft_dup_netdev_ops,
+	.policy		= nft_dup_netdev_policy,
+	.maxattr	= NFTA_DUP_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_dup_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_dup_netdev_type);
+}
+
+static void __exit nft_dup_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_dup_netdev_type);
+}
+
+module_init(nft_dup_netdev_module_init);
+module_exit(nft_dup_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "dup");
+MODULE_DESCRIPTION("nftables netdev packet duplication support");
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 000000000..a470e5f61
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+	struct nft_set			*set;
+	struct nft_set_ext_tmpl		tmpl;
+	enum nft_dynset_ops		op:8;
+	u8				sreg_key;
+	u8				sreg_data;
+	bool				invert;
+	bool				expr;
+	u8				num_exprs;
+	u64				timeout;
+	struct nft_expr			*expr_array[NFT_SET_EXPR_MAX];
+	struct nft_set_binding		binding;
+};
+
+static int nft_dynset_expr_setup(const struct nft_dynset *priv,
+				 const struct nft_set_ext *ext)
+{
+	struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext);
+	struct nft_expr *expr;
+	int i;
+
+	for (i = 0; i < priv->num_exprs; i++) {
+		expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
+		if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+			return -1;
+
+		elem_expr->size += priv->expr_array[i]->ops->size;
+	}
+
+	return 0;
+}
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+			    struct nft_regs *regs)
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+	struct nft_set_ext *ext;
+	u64 timeout;
+	void *elem;
+
+	if (!atomic_add_unless(&set->nelems, 1, set->size))
+		return NULL;
+
+	timeout = priv->timeout ? : set->timeout;
+	elem = nft_set_elem_init(set, &priv->tmpl,
+				 &regs->data[priv->sreg_key], NULL,
+				 &regs->data[priv->sreg_data],
+				 timeout, 0, GFP_ATOMIC);
+	if (IS_ERR(elem))
+		goto err1;
+
+	ext = nft_set_elem_ext(set, elem);
+	if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0)
+		goto err2;
+
+	return elem;
+
+err2:
+	nft_set_elem_destroy(set, elem, false);
+err1:
+	if (set->size)
+		atomic_dec(&set->nelems);
+	return NULL;
+}
+
+void nft_dynset_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+	struct nft_set *set = priv->set;
+	const struct nft_set_ext *ext;
+	u64 timeout;
+
+	if (priv->op == NFT_DYNSET_OP_DELETE) {
+		set->ops->delete(set, &regs->data[priv->sreg_key]);
+		return;
+	}
+
+	if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
+			     expr, regs, &ext)) {
+		if (priv->op == NFT_DYNSET_OP_UPDATE &&
+		    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+			timeout = priv->timeout ? : set->timeout;
+			*nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
+		}
+
+		nft_set_elem_update_expr(ext, regs, pkt);
+
+		if (priv->invert)
+			regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	if (!priv->invert)
+		regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_dynset_ext_add_expr(struct nft_dynset *priv)
+{
+	u8 size = 0;
+	int i;
+
+	for (i = 0; i < priv->num_exprs; i++)
+		size += priv->expr_array[i]->ops->size;
+
+	nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPRESSIONS,
+			       sizeof(struct nft_set_elem_expr) + size);
+}
+
+static struct nft_expr *
+nft_dynset_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set,
+		      const struct nlattr *attr, int pos)
+{
+	struct nft_expr *expr;
+	int err;
+
+	expr = nft_set_elem_expr_alloc(ctx, set, attr);
+	if (IS_ERR(expr))
+		return expr;
+
+	if (set->exprs[pos] && set->exprs[pos]->ops != expr->ops) {
+		err = -EOPNOTSUPP;
+		goto err_dynset_expr;
+	}
+
+	return expr;
+
+err_dynset_expr:
+	nft_expr_destroy(ctx, expr);
+	return ERR_PTR(err);
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+	[NFTA_DYNSET_SET_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_SET_MAXNAMELEN - 1 },
+	[NFTA_DYNSET_SET_ID]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_OP]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_SREG_KEY]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_SREG_DATA]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
+	[NFTA_DYNSET_EXPR]	= { .type = NLA_NESTED },
+	[NFTA_DYNSET_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_EXPRESSIONS] = { .type = NLA_NESTED },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+	struct nft_dynset *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set *set;
+	u64 timeout;
+	int err, i;
+
+	lockdep_assert_held(&nft_net->commit_mutex);
+
+	if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+	    tb[NFTA_DYNSET_OP] == NULL ||
+	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
+		return -EINVAL;
+
+	if (tb[NFTA_DYNSET_FLAGS]) {
+		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+		if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR))
+			return -EOPNOTSUPP;
+		if (flags & NFT_DYNSET_F_INV)
+			priv->invert = true;
+		if (flags & NFT_DYNSET_F_EXPR)
+			priv->expr = true;
+	}
+
+	set = nft_set_lookup_global(ctx->net, ctx->table,
+				    tb[NFTA_DYNSET_SET_NAME],
+				    tb[NFTA_DYNSET_SET_ID], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (set->flags & NFT_SET_OBJECT)
+		return -EOPNOTSUPP;
+
+	if (set->ops->update == NULL)
+		return -EOPNOTSUPP;
+
+	if (set->flags & NFT_SET_CONSTANT)
+		return -EBUSY;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+	if (priv->op > NFT_DYNSET_OP_DELETE)
+		return -EOPNOTSUPP;
+
+	timeout = 0;
+	if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EOPNOTSUPP;
+
+		err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
+		if (err)
+			return err;
+	}
+
+	err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+				      set->klen);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+		if (!(set->flags & NFT_SET_MAP))
+			return -EOPNOTSUPP;
+		if (set->dtype == NFT_DATA_VERDICT)
+			return -EOPNOTSUPP;
+
+		err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+					      &priv->sreg_data, set->dlen);
+		if (err < 0)
+			return err;
+	} else if (set->flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	if ((tb[NFTA_DYNSET_EXPR] || tb[NFTA_DYNSET_EXPRESSIONS]) &&
+	    !(set->flags & NFT_SET_EVAL))
+		return -EINVAL;
+
+	if (tb[NFTA_DYNSET_EXPR]) {
+		struct nft_expr *dynset_expr;
+
+		dynset_expr = nft_dynset_expr_alloc(ctx, set,
+						    tb[NFTA_DYNSET_EXPR], 0);
+		if (IS_ERR(dynset_expr))
+			return PTR_ERR(dynset_expr);
+
+		priv->num_exprs++;
+		priv->expr_array[0] = dynset_expr;
+
+		if (set->num_exprs > 1 ||
+		    (set->num_exprs == 1 &&
+		     dynset_expr->ops != set->exprs[0]->ops)) {
+			err = -EOPNOTSUPP;
+			goto err_expr_free;
+		}
+	} else if (tb[NFTA_DYNSET_EXPRESSIONS]) {
+		struct nft_expr *dynset_expr;
+		struct nlattr *tmp;
+		int left;
+
+		if (!priv->expr)
+			return -EINVAL;
+
+		i = 0;
+		nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) {
+			if (i == NFT_SET_EXPR_MAX) {
+				err = -E2BIG;
+				goto err_expr_free;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_expr_free;
+			}
+			dynset_expr = nft_dynset_expr_alloc(ctx, set, tmp, i);
+			if (IS_ERR(dynset_expr)) {
+				err = PTR_ERR(dynset_expr);
+				goto err_expr_free;
+			}
+			priv->expr_array[i] = dynset_expr;
+			priv->num_exprs++;
+
+			if (set->num_exprs) {
+				if (i >= set->num_exprs) {
+					err = -EINVAL;
+					goto err_expr_free;
+				}
+				if (dynset_expr->ops != set->exprs[i]->ops) {
+					err = -EOPNOTSUPP;
+					goto err_expr_free;
+				}
+			}
+			i++;
+		}
+		if (set->num_exprs && set->num_exprs != i) {
+			err = -EOPNOTSUPP;
+			goto err_expr_free;
+		}
+	} else if (set->num_exprs > 0) {
+		err = nft_set_elem_expr_clone(ctx, set, priv->expr_array);
+		if (err < 0)
+			return err;
+
+		priv->num_exprs = set->num_exprs;
+	}
+
+	nft_set_ext_prepare(&priv->tmpl);
+	nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+	if (set->flags & NFT_SET_MAP)
+		nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+
+	if (priv->num_exprs)
+		nft_dynset_ext_add_expr(priv);
+
+	if (set->flags & NFT_SET_TIMEOUT) {
+		if (timeout || set->timeout) {
+			nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
+			nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+		}
+	}
+
+	priv->timeout = timeout;
+
+	err = nf_tables_bind_set(ctx, set, &priv->binding);
+	if (err < 0)
+		goto err_expr_free;
+
+	if (set->size == 0)
+		set->size = 0xffff;
+
+	priv->set = set;
+	return 0;
+
+err_expr_free:
+	for (i = 0; i < priv->num_exprs; i++)
+		nft_expr_destroy(ctx, priv->expr_array[i]);
+	return err;
+}
+
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_activate_set(ctx, priv->set);
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+	int i;
+
+	for (i = 0; i < priv->num_exprs; i++)
+		nft_expr_destroy(ctx, priv->expr_array[i]);
+
+	nf_tables_destroy_set(ctx, priv->set);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
+	int i;
+
+	if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
+		goto nla_put_failure;
+	if (priv->set->flags & NFT_SET_MAP &&
+	    nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
+			 nf_jiffies64_to_msecs(priv->timeout),
+			 NFTA_DYNSET_PAD))
+		goto nla_put_failure;
+	if (priv->set->num_exprs == 0) {
+		if (priv->num_exprs == 1) {
+			if (nft_expr_dump(skb, NFTA_DYNSET_EXPR,
+					  priv->expr_array[0]))
+				goto nla_put_failure;
+		} else if (priv->num_exprs > 1) {
+			struct nlattr *nest;
+
+			nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS);
+			if (!nest)
+				goto nla_put_failure;
+
+			for (i = 0; i < priv->num_exprs; i++) {
+				if (nft_expr_dump(skb, NFTA_LIST_ELEM,
+						  priv->expr_array[i]))
+					goto nla_put_failure;
+			}
+			nla_nest_end(skb, nest);
+		}
+	}
+	if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nft_expr_ops nft_dynset_ops = {
+	.type		= &nft_dynset_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+	.eval		= nft_dynset_eval,
+	.init		= nft_dynset_init,
+	.destroy	= nft_dynset_destroy,
+	.activate	= nft_dynset_activate,
+	.deactivate	= nft_dynset_deactivate,
+	.dump		= nft_dynset_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+struct nft_expr_type nft_dynset_type __read_mostly = {
+	.name		= "dynset",
+	.ops		= &nft_dynset_ops,
+	.policy		= nft_dynset_policy,
+	.maxattr	= NFTA_DYNSET_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 000000000..de588f7b6
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/sctp/sctp.h>
+#include <net/tcp.h>
+
+struct nft_exthdr {
+	u8			type;
+	u8			offset;
+	u8			len;
+	u8			op;
+	u8			dreg;
+	u8			sreg;
+	u8			flags;
+};
+
+static unsigned int optlen(const u8 *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
+		return 1;
+	else
+		return opt[offset + 1];
+}
+
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+	if (len % NFT_REG32_SIZE)
+		dest[len / NFT_REG32_SIZE] = 0;
+
+	return skb_copy_bits(skb, offset, dest, len);
+}
+
+static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	unsigned int offset = 0;
+	int err;
+
+	if (pkt->skb->protocol != htons(ETH_P_IPV6))
+		goto err;
+
+	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+		nft_reg_store8(dest, err >= 0);
+		return;
+	} else if (err < 0) {
+		goto err;
+	}
+	offset += priv->offset;
+
+	if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+		goto err;
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+/* find the offset to specified option.
+ *
+ * If target header is found, its offset is set in *offset and return option
+ * number. Otherwise, return negative error.
+ *
+ * If the first fragment doesn't contain the End of Options it is considered
+ * invalid.
+ */
+static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+			    unsigned int *offset, int target)
+{
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+	struct iphdr *iph, _iph;
+	unsigned int start;
+	bool found = false;
+	__be32 info;
+	int optlen;
+
+	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+	if (!iph)
+		return -EBADMSG;
+	start = sizeof(struct iphdr);
+
+	optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
+	if (optlen <= 0)
+		return -ENOENT;
+
+	memset(opt, 0, sizeof(struct ip_options));
+	/* Copy the options since __ip_options_compile() modifies
+	 * the options.
+	 */
+	if (skb_copy_bits(skb, start, opt->__data, optlen))
+		return -EBADMSG;
+	opt->optlen = optlen;
+
+	if (__ip_options_compile(net, opt, NULL, &info))
+		return -EBADMSG;
+
+	switch (target) {
+	case IPOPT_SSRR:
+	case IPOPT_LSRR:
+		if (!opt->srr)
+			break;
+		found = target == IPOPT_SSRR ? opt->is_strictroute :
+					       !opt->is_strictroute;
+		if (found)
+			*offset = opt->srr + start;
+		break;
+	case IPOPT_RR:
+		if (!opt->rr)
+			break;
+		*offset = opt->rr + start;
+		found = true;
+		break;
+	case IPOPT_RA:
+		if (!opt->router_alert)
+			break;
+		*offset = opt->router_alert + start;
+		found = true;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return found ? target : -ENOENT;
+}
+
+static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct sk_buff *skb = pkt->skb;
+	unsigned int offset;
+	int err;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto err;
+
+	err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
+	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+		nft_reg_store8(dest, err >= 0);
+		return;
+	} else if (err < 0) {
+		goto err;
+	}
+	offset += priv->offset;
+
+	if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+		goto err;
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+		       unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+	struct tcphdr *tcph;
+
+	if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
+		return NULL;
+
+	tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
+	if (!tcph)
+		return NULL;
+
+	*tcphdr_len = __tcp_hdrlen(tcph);
+	if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+		return NULL;
+
+	return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
+}
+
+static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int i, optl, tcphdr_len, offset;
+	u32 *dest = &regs->data[priv->dreg];
+	struct tcphdr *tcph;
+	u8 *opt;
+
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+	if (!tcph)
+		goto err;
+
+	opt = (u8 *)tcph;
+	for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+		optl = optlen(opt, i);
+
+		if (priv->type != opt[i])
+			continue;
+
+		if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+			goto err;
+
+		offset = i + priv->offset;
+		if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+			nft_reg_store8(dest, 1);
+		} else {
+			if (priv->len % NFT_REG32_SIZE)
+				dest[priv->len / NFT_REG32_SIZE] = 0;
+			memcpy(dest, opt + offset, priv->len);
+		}
+
+		return;
+	}
+
+err:
+	if (priv->flags & NFT_EXTHDR_F_PRESENT)
+		*dest = 0;
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int i, optl, tcphdr_len, offset;
+	struct tcphdr *tcph;
+	u8 *opt;
+
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+	if (!tcph)
+		goto err;
+
+	if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+		goto err;
+
+	tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+	opt = (u8 *)tcph;
+
+	for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+		union {
+			__be16 v16;
+			__be32 v32;
+		} old, new;
+
+		optl = optlen(opt, i);
+
+		if (priv->type != opt[i])
+			continue;
+
+		if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+			goto err;
+
+		offset = i + priv->offset;
+
+		switch (priv->len) {
+		case 2:
+			old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset));
+			new.v16 = (__force __be16)nft_reg_load16(
+				&regs->data[priv->sreg]);
+
+			switch (priv->type) {
+			case TCPOPT_MSS:
+				/* increase can cause connection to stall */
+				if (ntohs(old.v16) <= ntohs(new.v16))
+					return;
+			break;
+			}
+
+			if (old.v16 == new.v16)
+				return;
+
+			put_unaligned(new.v16, (__be16*)(opt + offset));
+			inet_proto_csum_replace2(&tcph->check, pkt->skb,
+						 old.v16, new.v16, false);
+			break;
+		case 4:
+			new.v32 = nft_reg_load_be32(&regs->data[priv->sreg]);
+			old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset));
+
+			if (old.v32 == new.v32)
+				return;
+
+			put_unaligned(new.v32, (__be32*)(opt + offset));
+			inet_proto_csum_replace4(&tcph->check, pkt->skb,
+						 old.v32, new.v32, false);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
+
+		return;
+	}
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
+				      struct nft_regs *regs,
+				      const struct nft_pktinfo *pkt)
+{
+	u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int i, tcphdr_len, optl;
+	struct tcphdr *tcph;
+	u8 *opt;
+
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+	if (!tcph)
+		goto err;
+
+	if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+		goto drop;
+
+	tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+	opt = (u8 *)tcph;
+
+	for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+		unsigned int j;
+
+		optl = optlen(opt, i);
+		if (priv->type != opt[i])
+			continue;
+
+		if (i + optl > tcphdr_len)
+			goto drop;
+
+		for (j = 0; j < optl; ++j) {
+			u16 n = TCPOPT_NOP;
+			u16 o = opt[i+j];
+
+			if ((i + j) % 2 == 0) {
+				o <<= 8;
+				n <<= 8;
+			}
+			inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
+						 htons(n), false);
+		}
+		memset(opt + i, TCPOPT_NOP, optl);
+		return;
+	}
+
+	/* option not found, continue. This allows to do multiple
+	 * option removals per rule.
+	 */
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+	return;
+drop:
+	/* can't remove, no choice but to drop */
+	regs->verdict.code = NF_DROP;
+}
+
+static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+
+	if (pkt->tprot != IPPROTO_SCTP)
+		goto err;
+
+	do {
+		sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
+		if (!sch || !sch->length)
+			break;
+
+		if (sch->type == priv->type) {
+			if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+				nft_reg_store8(dest, true);
+				return;
+			}
+			if (priv->offset + priv->len > ntohs(sch->length) ||
+			    offset + ntohs(sch->length) > pkt->skb->len)
+				break;
+
+			if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+						dest, priv->len) < 0)
+				break;
+			return;
+		}
+		offset += SCTP_PAD4(ntohs(sch->length));
+	} while (offset < pkt->skb->len);
+err:
+	if (priv->flags & NFT_EXTHDR_F_PRESENT)
+		nft_reg_store8(dest, false);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int thoff, dataoff, optoff, optlen, i;
+	u32 *dest = &regs->data[priv->dreg];
+	const struct dccp_hdr *dh;
+	struct dccp_hdr _dh;
+
+	if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
+		goto err;
+
+	thoff = nft_thoff(pkt);
+
+	dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
+	if (!dh)
+		goto err;
+
+	dataoff = dh->dccph_doff * sizeof(u32);
+	optoff = __dccp_hdr_len(dh);
+	if (dataoff <= optoff)
+		goto err;
+
+	optlen = dataoff - optoff;
+
+	for (i = 0; i < optlen; ) {
+		/* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
+		 * the length; the remaining options are at least 2B long.  In
+		 * all cases, the first byte contains the option type.  In
+		 * multi-byte options, the second byte contains the option
+		 * length, which must be at least two: 1 for the type plus 1 for
+		 * the length plus 0-253 for any following option data.  We
+		 * aren't interested in the option data, only the type and the
+		 * length, so we don't need to read more than two bytes at a
+		 * time.
+		 */
+		unsigned int buflen = optlen - i;
+		u8 buf[2], *bufp;
+		u8 type, len;
+
+		if (buflen > sizeof(buf))
+			buflen = sizeof(buf);
+
+		bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
+					  &buf);
+		if (!bufp)
+			goto err;
+
+		type = bufp[0];
+
+		if (type == priv->type) {
+			nft_reg_store8(dest, 1);
+			return;
+		}
+
+		if (type <= DCCPO_MAX_RESERVED) {
+			i++;
+			continue;
+		}
+
+		if (buflen < 2)
+			goto err;
+
+		len = bufp[1];
+
+		if (len < 2)
+			goto err;
+
+		i += len;
+	}
+
+err:
+	*dest = 0;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
+	[NFTA_EXTHDR_OFFSET]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_LEN]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_OP]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_SREG]		= { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+	int err;
+
+	if (!tb[NFTA_EXTHDR_DREG] ||
+	    !tb[NFTA_EXTHDR_TYPE] ||
+	    !tb[NFTA_EXTHDR_OFFSET] ||
+	    !tb[NFTA_EXTHDR_LEN])
+		return -EINVAL;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_EXTHDR_FLAGS]) {
+		err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
+		if (err < 0)
+			return err;
+
+		if (flags & ~NFT_EXTHDR_F_PRESENT)
+			return -EINVAL;
+	}
+
+	if (tb[NFTA_EXTHDR_OP]) {
+		err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+		if (err < 0)
+			return err;
+	}
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = offset;
+	priv->len    = len;
+	priv->flags  = flags;
+	priv->op     = op;
+
+	return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
+					&priv->dreg, NULL, NFT_DATA_VALUE,
+					priv->len);
+}
+
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr,
+				   const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+	int err;
+
+	if (!tb[NFTA_EXTHDR_SREG] ||
+	    !tb[NFTA_EXTHDR_TYPE] ||
+	    !tb[NFTA_EXTHDR_OFFSET] ||
+	    !tb[NFTA_EXTHDR_LEN])
+		return -EINVAL;
+
+	if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+		return -EINVAL;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	if (offset < 2)
+		return -EOPNOTSUPP;
+
+	switch (len) {
+	case 2: break;
+	case 4: break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+	if (err < 0)
+		return err;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = offset;
+	priv->len    = len;
+	priv->flags  = flags;
+	priv->op     = op;
+
+	return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+				       priv->len);
+}
+
+static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
+				     const struct nft_expr *expr,
+				     const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_EXTHDR_SREG] ||
+	    tb[NFTA_EXTHDR_DREG] ||
+	    tb[NFTA_EXTHDR_FLAGS] ||
+	    tb[NFTA_EXTHDR_OFFSET] ||
+	    tb[NFTA_EXTHDR_LEN])
+		return -EINVAL;
+
+	if (!tb[NFTA_EXTHDR_TYPE])
+		return -EINVAL;
+
+	priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->op = NFT_EXTHDR_OP_TCPOPT;
+
+	return 0;
+}
+
+static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err = nft_exthdr_init(ctx, expr, tb);
+
+	if (err < 0)
+		return err;
+
+	switch (priv->type) {
+	case IPOPT_SSRR:
+	case IPOPT_LSRR:
+	case IPOPT_RR:
+	case IPOPT_RA:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err = nft_exthdr_init(ctx, expr, tb);
+
+	if (err < 0)
+		return err;
+
+	if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
+	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
+static bool nft_exthdr_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+	const struct nft_exthdr *exthdr;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	exthdr = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->type != exthdr->type ||
+	    priv->op != exthdr->op ||
+	    priv->flags != exthdr->flags ||
+	    priv->offset != exthdr->offset ||
+	    priv->len != exthdr->len) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+
+static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_ipv6_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+	.reduce		= nft_exthdr_reduce,
+};
+
+static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_ipv4_eval,
+	.init		= nft_exthdr_ipv4_init,
+	.dump		= nft_exthdr_dump,
+	.reduce		= nft_exthdr_reduce,
+};
+
+static const struct nft_expr_ops nft_exthdr_tcp_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_tcp_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+	.reduce		= nft_exthdr_reduce,
+};
+
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_tcp_set_eval,
+	.init		= nft_exthdr_tcp_set_init,
+	.dump		= nft_exthdr_dump_set,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_tcp_strip_eval,
+	.init		= nft_exthdr_tcp_strip_init,
+	.dump		= nft_exthdr_dump_strip,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops nft_exthdr_sctp_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_sctp_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+	.reduce		= nft_exthdr_reduce,
+};
+
+static const struct nft_expr_ops nft_exthdr_dccp_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_dccp_eval,
+	.init		= nft_exthdr_dccp_init,
+	.dump		= nft_exthdr_dump,
+	.reduce		= nft_exthdr_reduce,
+};
+
+static const struct nft_expr_ops *
+nft_exthdr_select_ops(const struct nft_ctx *ctx,
+		      const struct nlattr * const tb[])
+{
+	u32 op;
+
+	if (!tb[NFTA_EXTHDR_OP])
+		return &nft_exthdr_ipv6_ops;
+
+	if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+		return ERR_PTR(-EOPNOTSUPP);
+
+	op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
+	switch (op) {
+	case NFT_EXTHDR_OP_TCPOPT:
+		if (tb[NFTA_EXTHDR_SREG])
+			return &nft_exthdr_tcp_set_ops;
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_tcp_ops;
+		return &nft_exthdr_tcp_strip_ops;
+	case NFT_EXTHDR_OP_IPV6:
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_ipv6_ops;
+		break;
+	case NFT_EXTHDR_OP_IPV4:
+		if (ctx->family != NFPROTO_IPV6) {
+			if (tb[NFTA_EXTHDR_DREG])
+				return &nft_exthdr_ipv4_ops;
+		}
+		break;
+	case NFT_EXTHDR_OP_SCTP:
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_sctp_ops;
+		break;
+	case NFT_EXTHDR_OP_DCCP:
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_dccp_ops;
+		break;
+	}
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+struct nft_expr_type nft_exthdr_type __read_mostly = {
+	.name		= "exthdr",
+	.select_ops	= nft_exthdr_select_ops,
+	.policy		= nft_exthdr_policy,
+	.maxattr	= NFTA_EXTHDR_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644
index 000000000..5748415f7
--- /dev/null
+++ b/net/netfilter/nft_fib.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
+	[NFTA_FIB_DREG]		= { .type = NLA_U32 },
+	[NFTA_FIB_RESULT]	= { .type = NLA_U32 },
+	[NFTA_FIB_FLAGS]	= { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(nft_fib_policy);
+
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+			NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+			NFTA_FIB_F_PRESENT)
+
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		     const struct nft_data **data)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+	case NFT_FIB_RESULT_OIFNAME:
+		hooks = (1 << NF_INET_PRE_ROUTING);
+		if (priv->flags & NFTA_FIB_F_IIF) {
+			hooks |= (1 << NF_INET_LOCAL_IN) |
+				 (1 << NF_INET_FORWARD);
+		}
+		break;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		if (priv->flags & NFTA_FIB_F_IIF)
+			hooks = (1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_FORWARD);
+		else if (priv->flags & NFTA_FIB_F_OIF)
+			hooks = (1 << NF_INET_LOCAL_OUT) |
+				(1 << NF_INET_POST_ROUTING) |
+				(1 << NF_INET_FORWARD);
+		else
+			hooks = (1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_LOCAL_OUT) |
+				(1 << NF_INET_FORWARD) |
+				(1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_POST_ROUTING);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_fib_validate);
+
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		 const struct nlattr * const tb[])
+{
+	struct nft_fib *priv = nft_expr_priv(expr);
+	unsigned int len;
+	int err;
+
+	if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
+		return -EINVAL;
+
+	priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
+
+	if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+		return -EINVAL;
+
+	if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
+			   (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
+		return -EINVAL;
+	if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
+			   (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
+		return -EINVAL;
+	if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
+		return -EINVAL;
+
+	priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		if (priv->flags & NFTA_FIB_F_OIF)
+			return -EINVAL;
+		len = sizeof(int);
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		if (priv->flags & NFTA_FIB_F_OIF)
+			return -EINVAL;
+		len = IFNAMSIZ;
+		break;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg,
+				       NULL, NFT_DATA_VALUE, len);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_init);
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_dump);
+
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
+			  const struct net_device *dev)
+{
+	u32 *dreg = reg;
+	int index;
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		index = dev ? dev->ifindex : 0;
+		if (priv->flags & NFTA_FIB_F_PRESENT)
+			nft_reg_store8(dreg, !!index);
+		else
+			*dreg = index;
+
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		if (priv->flags & NFTA_FIB_F_PRESENT)
+			nft_reg_store8(dreg, !!dev);
+		else
+			strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		*dreg = 0;
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_fib_store_result);
+
+bool nft_fib_reduce(struct nft_regs_track *track,
+		    const struct nft_expr *expr)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	unsigned int len = NFT_REG32_SIZE;
+	const struct nft_fib *fib;
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		if (priv->flags & NFTA_FIB_F_PRESENT)
+			len = NFT_REG32_SIZE;
+		else
+			len = IFNAMSIZ;
+		break;
+	case NFT_FIB_RESULT_ADDRTYPE:
+	     break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, len);
+		return false;
+	}
+
+	fib = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->result != fib->result ||
+	    priv->flags != fib->flags) {
+		nft_reg_track_update(track, expr, priv->dreg, len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(nft_fib_reduce);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644
index 000000000..666a3741d
--- /dev/null
+++ b/net/netfilter/nft_fib_inet.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_inet_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib4_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib4_eval_type(expr, regs, pkt);
+		}
+		break;
+	case NFPROTO_IPV6:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib6_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib6_eval_type(expr, regs, pkt);
+		}
+		break;
+	}
+
+	regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_fib_inet_type;
+static const struct nft_expr_ops nft_fib_inet_ops = {
+	.type		= &nft_fib_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib_inet_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+	.reduce		= nft_fib_reduce,
+};
+
+static struct nft_expr_type nft_fib_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "fib",
+	.ops		= &nft_fib_inet_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib_inet_module_init(void)
+{
+	return nft_register_expr(&nft_fib_inet_type);
+}
+
+static void __exit nft_fib_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib_inet_type);
+}
+
+module_init(nft_fib_inet_module_init);
+module_exit(nft_fib_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
+MODULE_DESCRIPTION("nftables fib inet support");
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000..9121ec64e
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	switch (ntohs(pkt->skb->protocol)) {
+	case ETH_P_IP:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib4_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib4_eval_type(expr, regs, pkt);
+		}
+		break;
+	case ETH_P_IPV6:
+		if (!ipv6_mod_enabled())
+			break;
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib6_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib6_eval_type(expr, regs, pkt);
+		}
+		break;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+	.type		= &nft_fib_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib_netdev_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+	.reduce		= nft_fib_reduce,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "fib",
+	.ops		= &nft_fib_netdev_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
+MODULE_DESCRIPTION("nftables netdev fib lookups support");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
new file mode 100644
index 000000000..3d9f6dda5
--- /dev/null
+++ b/net/netfilter/nft_flow_offload.c
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ip.h> /* for ipv4 options. */
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct nft_flow_offload {
+	struct nft_flowtable	*flowtable;
+};
+
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
+{
+	if (dst_xfrm(dst))
+		return FLOW_OFFLOAD_XMIT_XFRM;
+
+	return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nft_default_forward_path(struct nf_flow_route *route,
+				     struct dst_entry *dst_cache,
+				     enum ip_conntrack_dir dir)
+{
+	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex;
+	route->tuple[dir].dst		= dst_cache;
+	route->tuple[dir].xmit_type	= nft_xmit_type(dst_cache);
+}
+
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+		return false;
+
+	return true;
+}
+
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+				     const struct dst_entry *dst_cache,
+				     const struct nf_conn *ct,
+				     enum ip_conntrack_dir dir, u8 *ha,
+				     struct net_device_path_stack *stack)
+{
+	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+	struct net_device *dev = dst_cache->dev;
+	struct neighbour *n;
+	u8 nud_state;
+
+	if (!nft_is_valid_ether_device(dev))
+		goto out;
+
+	n = dst_neigh_lookup(dst_cache, daddr);
+	if (!n)
+		return -1;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	ether_addr_copy(ha, n->ha);
+	read_unlock_bh(&n->lock);
+	neigh_release(n);
+
+	if (!(nud_state & NUD_VALID))
+		return -1;
+
+out:
+	return dev_fill_forward_path(dev, ha, stack);
+}
+
+struct nft_forward_info {
+	const struct net_device *indev;
+	const struct net_device *outdev;
+	const struct net_device *hw_outdev;
+	struct id {
+		__u16	id;
+		__be16	proto;
+	} encap[NF_FLOW_TABLE_ENCAP_MAX];
+	u8 num_encaps;
+	u8 ingress_vlans;
+	u8 h_source[ETH_ALEN];
+	u8 h_dest[ETH_ALEN];
+	enum flow_offload_xmit_type xmit_type;
+};
+
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
+			      struct nft_forward_info *info,
+			      unsigned char *ha, struct nf_flowtable *flowtable)
+{
+	const struct net_device_path *path;
+	int i;
+
+	memcpy(info->h_dest, ha, ETH_ALEN);
+
+	for (i = 0; i < stack->num_paths; i++) {
+		path = &stack->path[i];
+		switch (path->type) {
+		case DEV_PATH_ETHERNET:
+		case DEV_PATH_DSA:
+		case DEV_PATH_VLAN:
+		case DEV_PATH_PPPOE:
+			info->indev = path->dev;
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			if (path->type == DEV_PATH_ETHERNET)
+				break;
+			if (path->type == DEV_PATH_DSA) {
+				i = stack->num_paths;
+				break;
+			}
+
+			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
+			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+				info->indev = NULL;
+				break;
+			}
+			if (!info->outdev)
+				info->outdev = path->dev;
+			info->encap[info->num_encaps].id = path->encap.id;
+			info->encap[info->num_encaps].proto = path->encap.proto;
+			info->num_encaps++;
+			if (path->type == DEV_PATH_PPPOE)
+				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+			break;
+		case DEV_PATH_BRIDGE:
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			switch (path->bridge.vlan_mode) {
+			case DEV_PATH_BR_VLAN_UNTAG_HW:
+				info->ingress_vlans |= BIT(info->num_encaps - 1);
+				break;
+			case DEV_PATH_BR_VLAN_TAG:
+				info->encap[info->num_encaps].id = path->bridge.vlan_id;
+				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+				info->num_encaps++;
+				break;
+			case DEV_PATH_BR_VLAN_UNTAG:
+				info->num_encaps--;
+				break;
+			case DEV_PATH_BR_VLAN_KEEP:
+				break;
+			}
+			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+			break;
+		default:
+			info->indev = NULL;
+			break;
+		}
+	}
+	if (!info->outdev)
+		info->outdev = info->indev;
+
+	info->hw_outdev = info->indev;
+
+	if (nf_flowtable_hw_offload(flowtable) &&
+	    nft_is_valid_ether_device(info->indev))
+		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+}
+
+static bool nft_flowtable_find_dev(const struct net_device *dev,
+				   struct nft_flowtable *ft)
+{
+	struct nft_hook *hook;
+	bool found = false;
+
+	list_for_each_entry_rcu(hook, &ft->hook_list, list) {
+		if (hook->ops.dev != dev)
+			continue;
+
+		found = true;
+		break;
+	}
+
+	return found;
+}
+
+static void nft_dev_forward_path(struct nf_flow_route *route,
+				 const struct nf_conn *ct,
+				 enum ip_conntrack_dir dir,
+				 struct nft_flowtable *ft)
+{
+	const struct dst_entry *dst = route->tuple[dir].dst;
+	struct net_device_path_stack stack;
+	struct nft_forward_info info = {};
+	unsigned char ha[ETH_ALEN];
+	int i;
+
+	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+		nft_dev_path_info(&stack, &info, ha, &ft->data);
+
+	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
+		return;
+
+	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+	for (i = 0; i < info.num_encaps; i++) {
+		route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+	}
+	route->tuple[!dir].in.num_encaps = info.num_encaps;
+	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+
+	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+		route->tuple[dir].out.ifindex = info.outdev->ifindex;
+		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
+		route->tuple[dir].xmit_type = info.xmit_type;
+	}
+}
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+			  const struct nf_conn *ct,
+			  struct nf_flow_route *route,
+			  enum ip_conntrack_dir dir,
+			  struct nft_flowtable *ft)
+{
+	struct dst_entry *this_dst = skb_dst(pkt->skb);
+	struct dst_entry *other_dst = NULL;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+		fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+		fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
+		fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
+		fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+		fl.u.ip4.flowi4_mark = pkt->skb->mark;
+		fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+		break;
+	case NFPROTO_IPV6:
+		fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+		fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
+		fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
+		fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
+		fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
+		fl.u.ip6.flowi6_mark = pkt->skb->mark;
+		fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
+		break;
+	}
+
+	nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
+	if (!other_dst)
+		return -ENOENT;
+
+	nft_default_forward_path(route, this_dst, dir);
+	nft_default_forward_path(route, other_dst, !dir);
+
+	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH &&
+	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+		nft_dev_forward_path(route, ct, dir, ft);
+		nft_dev_forward_path(route, ct, !dir, ft);
+	}
+
+	return 0;
+}
+
+static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
+{
+	if (skb_sec_path(skb))
+		return true;
+
+	if (family == NFPROTO_IPV4) {
+		const struct ip_options *opt;
+
+		opt = &(IPCB(skb)->opt);
+
+		if (unlikely(opt->optlen))
+			return true;
+	}
+
+	return false;
+}
+
+static void nft_flow_offload_eval(const struct nft_expr *expr,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+	struct nf_flowtable *flowtable = &priv->flowtable->data;
+	struct tcphdr _tcph, *tcph = NULL;
+	struct nf_flow_route route = {};
+	enum ip_conntrack_info ctinfo;
+	struct flow_offload *flow;
+	enum ip_conntrack_dir dir;
+	struct nf_conn *ct;
+	int ret;
+
+	if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
+		goto out;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+	if (!ct)
+		goto out;
+
+	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+	case IPPROTO_TCP:
+		tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
+					  sizeof(_tcph), &_tcph);
+		if (unlikely(!tcph || tcph->fin || tcph->rst ||
+			     !nf_conntrack_tcp_established(ct)))
+			goto out;
+		break;
+	case IPPROTO_UDP:
+		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE: {
+		struct nf_conntrack_tuple *tuple;
+
+		if (ct->status & IPS_NAT_MASK)
+			goto out;
+		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+		/* No support for GRE v1 */
+		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+			goto out;
+		break;
+	}
+#endif
+	default:
+		goto out;
+	}
+
+	if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+	    ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH))
+		goto out;
+
+	if (!nf_ct_is_confirmed(ct))
+		goto out;
+
+	if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+		goto out;
+
+	dir = CTINFO2DIR(ctinfo);
+	if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
+		goto err_flow_route;
+
+	flow = flow_offload_alloc(ct);
+	if (!flow)
+		goto err_flow_alloc;
+
+	if (flow_offload_route_init(flow, &route) < 0)
+		goto err_flow_add;
+
+	if (tcph) {
+		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
+	ret = flow_offload_add(flowtable, flow);
+	if (ret < 0)
+		goto err_flow_add;
+
+	dst_release(route.tuple[!dir].dst);
+	return;
+
+err_flow_add:
+	flow_offload_free(flow);
+err_flow_alloc:
+	dst_release(route.tuple[!dir].dst);
+err_flow_route:
+	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+out:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_flow_offload_validate(const struct nft_ctx *ctx,
+				     const struct nft_expr *expr,
+				     const struct nft_data **data)
+{
+	unsigned int hook_mask = (1 << NF_INET_FORWARD);
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	return nft_chain_validate_hooks(ctx->chain, hook_mask);
+}
+
+static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = {
+	[NFTA_FLOW_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_NAME_MAXLEN - 1 },
+};
+
+static int nft_flow_offload_init(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr,
+				 const struct nlattr * const tb[])
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_flowtable *flowtable;
+
+	if (!tb[NFTA_FLOW_TABLE_NAME])
+		return -EINVAL;
+
+	flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
+					 genmask);
+	if (IS_ERR(flowtable))
+		return PTR_ERR(flowtable);
+
+	if (!nft_use_inc(&flowtable->use))
+		return -EMFILE;
+
+	priv->flowtable = flowtable;
+
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+					const struct nft_expr *expr,
+					enum nft_trans_phase phase)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	nft_use_inc_restore(&priv->flowtable->use);
+}
+
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+				     const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_flow_offload_type;
+static const struct nft_expr_ops nft_flow_offload_ops = {
+	.type		= &nft_flow_offload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
+	.eval		= nft_flow_offload_eval,
+	.init		= nft_flow_offload_init,
+	.activate	= nft_flow_offload_activate,
+	.deactivate	= nft_flow_offload_deactivate,
+	.destroy	= nft_flow_offload_destroy,
+	.validate	= nft_flow_offload_validate,
+	.dump		= nft_flow_offload_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_flow_offload_type __read_mostly = {
+	.name		= "flow_offload",
+	.ops		= &nft_flow_offload_ops,
+	.policy		= nft_flow_offload_policy,
+	.maxattr	= NFTA_FLOW_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+				     unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (event != NETDEV_DOWN)
+		return NOTIFY_DONE;
+
+	nf_flow_table_cleanup(dev);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+	.notifier_call	= flow_offload_netdev_event,
+};
+
+static int __init nft_flow_offload_module_init(void)
+{
+	int err;
+
+	err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+	if (err)
+		goto err;
+
+	err = nft_register_expr(&nft_flow_offload_type);
+	if (err < 0)
+		goto register_expr;
+
+	return 0;
+
+register_expr:
+	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
+	return err;
+}
+
+static void __exit nft_flow_offload_module_exit(void)
+{
+	nft_unregister_expr(&nft_flow_offload_type);
+	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+module_init(nft_flow_offload_module_init);
+module_exit(nft_flow_offload_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("flow_offload");
+MODULE_DESCRIPTION("nftables hardware flow offload module");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
new file mode 100644
index 000000000..7c5876dc9
--- /dev/null
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/netfilter/nf_dup_netdev.h>
+#include <net/neighbour.h>
+#include <net/ip.h>
+
+struct nft_fwd_netdev {
+	u8	sreg_dev;
+};
+
+static void nft_fwd_netdev_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+	int oif = regs->data[priv->sreg_dev];
+	struct sk_buff *skb = pkt->skb;
+
+	/* This is used by ifb only. */
+	skb->skb_iif = skb->dev->ifindex;
+	skb_set_redirected(skb, nft_hook(pkt) == NF_NETDEV_INGRESS);
+
+	nf_fwd_netdev_egress(pkt, oif);
+	regs->verdict.code = NF_STOLEN;
+}
+
+static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
+	[NFTA_FWD_SREG_DEV]	= { .type = NLA_U32 },
+	[NFTA_FWD_SREG_ADDR]	= { .type = NLA_U32 },
+	[NFTA_FWD_NFPROTO]	= { .type = NLA_U32 },
+};
+
+static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_FWD_SREG_DEV] == NULL)
+		return -EINVAL;
+
+	return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+				       sizeof(int));
+}
+
+static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
+}
+
+static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
+{
+	return true;
+}
+
+struct nft_fwd_neigh {
+	u8			sreg_dev;
+	u8			sreg_addr;
+	u8			nfproto;
+};
+
+static void nft_fwd_neigh_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+	void *addr = &regs->data[priv->sreg_addr];
+	int oif = regs->data[priv->sreg_dev];
+	unsigned int verdict = NF_STOLEN;
+	struct sk_buff *skb = pkt->skb;
+	struct net_device *dev;
+	int neigh_table;
+
+	switch (priv->nfproto) {
+	case NFPROTO_IPV4: {
+		struct iphdr *iph;
+
+		if (skb->protocol != htons(ETH_P_IP)) {
+			verdict = NFT_BREAK;
+			goto out;
+		}
+		if (skb_try_make_writable(skb, sizeof(*iph))) {
+			verdict = NF_DROP;
+			goto out;
+		}
+		iph = ip_hdr(skb);
+		ip_decrease_ttl(iph);
+		neigh_table = NEIGH_ARP_TABLE;
+		break;
+		}
+	case NFPROTO_IPV6: {
+		struct ipv6hdr *ip6h;
+
+		if (skb->protocol != htons(ETH_P_IPV6)) {
+			verdict = NFT_BREAK;
+			goto out;
+		}
+		if (skb_try_make_writable(skb, sizeof(*ip6h))) {
+			verdict = NF_DROP;
+			goto out;
+		}
+		ip6h = ipv6_hdr(skb);
+		ip6h->hop_limit--;
+		neigh_table = NEIGH_ND_TABLE;
+		break;
+		}
+	default:
+		verdict = NFT_BREAK;
+		goto out;
+	}
+
+	dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+	if (dev == NULL)
+		return;
+
+	skb->dev = dev;
+	skb_clear_tstamp(skb);
+	neigh_xmit(neigh_table, dev, addr, skb);
+out:
+	regs->verdict.code = verdict;
+}
+
+static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+	unsigned int addr_len;
+	int err;
+
+	if (!tb[NFTA_FWD_SREG_DEV] ||
+	    !tb[NFTA_FWD_SREG_ADDR] ||
+	    !tb[NFTA_FWD_NFPROTO])
+		return -EINVAL;
+
+	priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
+
+	switch (priv->nfproto) {
+	case NFPROTO_IPV4:
+		addr_len = sizeof(struct in_addr);
+		break;
+	case NFPROTO_IPV6:
+		addr_len = sizeof(struct in6_addr);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+				      sizeof(int));
+	if (err < 0)
+		return err;
+
+	return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+				       addr_len);
+}
+
+static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev) ||
+	    nft_dump_register(skb, NFTA_FWD_SREG_ADDR, priv->sreg_addr) ||
+	    nla_put_be32(skb, NFTA_FWD_NFPROTO, htonl(priv->nfproto)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_fwd_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) |
+						    (1 << NF_NETDEV_EGRESS));
+}
+
+static struct nft_expr_type nft_fwd_netdev_type;
+static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
+	.type		= &nft_fwd_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fwd_neigh)),
+	.eval		= nft_fwd_neigh_eval,
+	.init		= nft_fwd_neigh_init,
+	.dump		= nft_fwd_neigh_dump,
+	.validate	= nft_fwd_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops nft_fwd_netdev_ops = {
+	.type		= &nft_fwd_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)),
+	.eval		= nft_fwd_netdev_eval,
+	.init		= nft_fwd_netdev_init,
+	.dump		= nft_fwd_netdev_dump,
+	.validate	= nft_fwd_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+	.offload	= nft_fwd_netdev_offload,
+	.offload_action	= nft_fwd_netdev_offload_action,
+};
+
+static const struct nft_expr_ops *
+nft_fwd_select_ops(const struct nft_ctx *ctx,
+		   const struct nlattr * const tb[])
+{
+	if (tb[NFTA_FWD_SREG_ADDR])
+		return &nft_fwd_neigh_netdev_ops;
+	if (tb[NFTA_FWD_SREG_DEV])
+		return &nft_fwd_netdev_ops;
+
+        return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_expr_type nft_fwd_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "fwd",
+	.select_ops	= nft_fwd_select_ops,
+	.policy		= nft_fwd_netdev_policy,
+	.maxattr	= NFTA_FWD_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fwd_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_fwd_netdev_type);
+}
+
+static void __exit nft_fwd_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_fwd_netdev_type);
+}
+
+module_init(nft_fwd_netdev_module_init);
+module_exit(nft_fwd_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000..e5631e88b
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
+
+struct nft_jhash {
+	u8			sreg;
+	u8			dreg;
+	u8			len;
+	bool			autogen_seed:1;
+	u32			modulus;
+	u32			seed;
+	u32			offset;
+};
+
+static void nft_jhash_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_jhash *priv = nft_expr_priv(expr);
+	const void *data = &regs->data[priv->sreg];
+	u32 h;
+
+	h = reciprocal_scale(jhash(data, priv->len, priv->seed),
+			     priv->modulus);
+
+	regs->data[priv->dreg] = h + priv->offset;
+}
+
+struct nft_symhash {
+	u8			dreg;
+	u32			modulus;
+	u32			offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_pktinfo *pkt)
+{
+	struct nft_symhash *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 h;
+
+	h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+	regs->data[priv->dreg] = h + priv->offset;
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_LEN]		= { .type = NLA_U32 },
+	[NFTA_HASH_MODULUS]	= { .type = NLA_U32 },
+	[NFTA_HASH_SEED]	= { .type = NLA_U32 },
+	[NFTA_HASH_OFFSET]	= { .type = NLA_U32 },
+	[NFTA_HASH_TYPE]	= { .type = NLA_U32 },
+};
+
+static int nft_jhash_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_jhash *priv = nft_expr_priv(expr);
+	u32 len;
+	int err;
+
+	if (!tb[NFTA_HASH_SREG] ||
+	    !tb[NFTA_HASH_DREG] ||
+	    !tb[NFTA_HASH_LEN]  ||
+	    !tb[NFTA_HASH_MODULUS])
+		return -EINVAL;
+
+	if (tb[NFTA_HASH_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+	err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+	if (len == 0)
+		return -ERANGE;
+
+	priv->len = len;
+
+	err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+	if (err < 0)
+		return err;
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+	if (priv->modulus < 1)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	if (tb[NFTA_HASH_SEED]) {
+		priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+	} else {
+		priv->autogen_seed = true;
+		get_random_bytes(&priv->seed, sizeof(priv->seed));
+	}
+
+	return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_symhash_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_symhash *priv = nft_expr_priv(expr);
+
+	if (!tb[NFTA_HASH_DREG]    ||
+	    !tb[NFTA_HASH_MODULUS])
+		return -EINVAL;
+
+	if (tb[NFTA_HASH_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+	if (priv->modulus < 1)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG],
+					&priv->dreg, NULL, NFT_DATA_VALUE,
+					sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+			  const struct nft_expr *expr)
+{
+	const struct nft_jhash *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+		goto nla_put_failure;
+	if (!priv->autogen_seed &&
+	    nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+		goto nla_put_failure;
+	if (priv->offset != 0)
+		if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_jhash_reduce(struct nft_regs_track *track,
+			     const struct nft_expr *expr)
+{
+	const struct nft_jhash *priv = nft_expr_priv(expr);
+
+	nft_reg_track_cancel(track, priv->dreg, sizeof(u32));
+
+	return false;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+			    const struct nft_expr *expr)
+{
+	const struct nft_symhash *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+		goto nla_put_failure;
+	if (priv->offset != 0)
+		if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_symhash_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	struct nft_symhash *priv = nft_expr_priv(expr);
+	struct nft_symhash *symhash;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, sizeof(u32));
+		return false;
+	}
+
+	symhash = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->offset != symhash->offset ||
+	    priv->modulus != symhash->modulus) {
+		nft_reg_track_update(track, expr, priv->dreg, sizeof(u32));
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_jhash_ops = {
+	.type		= &nft_hash_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+	.eval		= nft_jhash_eval,
+	.init		= nft_jhash_init,
+	.dump		= nft_jhash_dump,
+	.reduce		= nft_jhash_reduce,
+};
+
+static const struct nft_expr_ops nft_symhash_ops = {
+	.type		= &nft_hash_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+	.eval		= nft_symhash_eval,
+	.init		= nft_symhash_init,
+	.dump		= nft_symhash_dump,
+	.reduce		= nft_symhash_reduce,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+		    const struct nlattr * const tb[])
+{
+	u32 type;
+
+	if (!tb[NFTA_HASH_TYPE])
+		return &nft_jhash_ops;
+
+	type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+	switch (type) {
+	case NFT_HASH_SYM:
+		return &nft_symhash_ops;
+	case NFT_HASH_JENKINS:
+		return &nft_jhash_ops;
+	default:
+		break;
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+	.name		= "hash",
+	.select_ops	= nft_hash_select_ops,
+	.policy		= nft_hash_policy,
+	.maxattr	= NFTA_HASH_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_expr(&nft_hash_type);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_expr(&nft_hash_type);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
+MODULE_DESCRIPTION("Netfilter nftables hash module");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 000000000..55fcf0280
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+
+void nft_immediate_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+	[NFTA_IMMEDIATE_DREG]	= { .type = NLA_U32 },
+	[NFTA_IMMEDIATE_DATA]	= { .type = NLA_NESTED },
+};
+
+static enum nft_data_types nft_reg_to_type(const struct nlattr *nla)
+{
+	enum nft_data_types type;
+	u8 reg;
+
+	reg = ntohl(nla_get_be32(nla));
+	if (reg == NFT_REG_VERDICT)
+		type = NFT_DATA_VERDICT;
+	else
+		type = NFT_DATA_VALUE;
+
+	return type;
+}
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc = {
+		.size	= sizeof(priv->data),
+	};
+	int err;
+
+	if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+	    tb[NFTA_IMMEDIATE_DATA] == NULL)
+		return -EINVAL;
+
+	desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]);
+	err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+	if (err < 0)
+		return err;
+
+	priv->dlen = desc.len;
+
+	err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG],
+				       &priv->dreg, &priv->data, desc.type,
+				       desc.len);
+	if (err < 0)
+		goto err1;
+
+	if (priv->dreg == NFT_REG_VERDICT) {
+		struct nft_chain *chain = priv->data.verdict.chain;
+
+		switch (priv->data.verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			err = nf_tables_bind_chain(ctx, chain);
+			if (err < 0)
+				goto err1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+
+err1:
+	nft_data_release(&priv->data, desc.type);
+	return err;
+}
+
+static void nft_immediate_activate(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	const struct nft_data *data = &priv->data;
+	struct nft_ctx chain_ctx;
+	struct nft_chain *chain;
+	struct nft_rule *rule;
+
+	if (priv->dreg == NFT_REG_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			chain = data->verdict.chain;
+			if (!nft_chain_binding(chain))
+				break;
+
+			chain_ctx = *ctx;
+			chain_ctx.chain = chain;
+
+			list_for_each_entry(rule, &chain->rules, list)
+				nft_rule_expr_activate(&chain_ctx, rule);
+
+			nft_clear(ctx->net, chain);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+					   struct nft_chain *chain,
+					   enum nft_trans_phase phase)
+{
+	struct nft_ctx chain_ctx;
+	struct nft_rule *rule;
+
+	chain_ctx = *ctx;
+	chain_ctx.chain = chain;
+
+	list_for_each_entry(rule, &chain->rules, list)
+		nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
+static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+				     const struct nft_expr *expr,
+				     enum nft_trans_phase phase)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	const struct nft_data *data = &priv->data;
+	struct nft_chain *chain;
+
+	if (priv->dreg == NFT_REG_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			chain = data->verdict.chain;
+			if (!nft_chain_binding(chain))
+				break;
+
+			switch (phase) {
+			case NFT_TRANS_PREPARE_ERROR:
+				nf_tables_unbind_chain(ctx, chain);
+				nft_deactivate_next(ctx->net, chain);
+				break;
+			case NFT_TRANS_PREPARE:
+				nft_immediate_chain_deactivate(ctx, chain, phase);
+				nft_deactivate_next(ctx->net, chain);
+				break;
+			default:
+				nft_immediate_chain_deactivate(ctx, chain, phase);
+				nft_chain_del(chain);
+				chain->bound = false;
+				nft_use_dec(&chain->table->use);
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (phase == NFT_TRANS_COMMIT)
+		return;
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_destroy(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	const struct nft_data *data = &priv->data;
+	struct nft_rule *rule, *n;
+	struct nft_ctx chain_ctx;
+	struct nft_chain *chain;
+
+	if (priv->dreg != NFT_REG_VERDICT)
+		return;
+
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		chain = data->verdict.chain;
+
+		if (!nft_chain_binding(chain))
+			break;
+
+		/* Rule construction failed, but chain is already bound:
+		 * let the transaction records release this chain and its rules.
+		 */
+		if (chain->bound) {
+			nft_use_dec(&chain->use);
+			break;
+		}
+
+		/* Rule has been deleted, release chain and its rules. */
+		chain_ctx = *ctx;
+		chain_ctx.chain = chain;
+
+		nft_use_dec(&chain->use);
+		list_for_each_entry_safe(rule, n, &chain->rules, list) {
+			nft_use_dec(&chain->use);
+			list_del(&rule->list);
+			nf_tables_rule_destroy(&chain_ctx, rule);
+		}
+		nf_tables_chain_destroy(&chain_ctx);
+		break;
+	default:
+		break;
+	}
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg))
+		goto nla_put_failure;
+
+	return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+			     nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_immediate_validate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  const struct nft_data **d)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_ctx *pctx = (struct nft_ctx *)ctx;
+	const struct nft_data *data;
+	int err;
+
+	if (priv->dreg != NFT_REG_VERDICT)
+		return 0;
+
+	data = &priv->data;
+
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		pctx->level++;
+		err = nft_chain_validate(ctx, data->verdict.chain);
+		if (err < 0)
+			return err;
+		pctx->level--;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx,
+					 struct nft_flow_rule *flow,
+					 const struct nft_immediate_expr *priv)
+{
+	struct flow_action_entry *entry;
+	const struct nft_data *data;
+
+	entry = &flow->rule->action.entries[ctx->num_actions++];
+
+	data = &priv->data;
+	switch (data->verdict.code) {
+	case NF_ACCEPT:
+		entry->id = FLOW_ACTION_ACCEPT;
+		break;
+	case NF_DROP:
+		entry->id = FLOW_ACTION_DROP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+				 struct nft_flow_rule *flow,
+				 const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		return nft_immediate_offload_verdict(ctx, flow, priv);
+
+	memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data));
+
+	return 0;
+}
+
+static bool nft_immediate_offload_action(const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		return true;
+
+	return false;
+}
+
+static bool nft_immediate_reduce(struct nft_regs_track *track,
+				 const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg != NFT_REG_VERDICT)
+		nft_reg_track_cancel(track, priv->dreg, priv->dlen);
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_imm_ops = {
+	.type		= &nft_imm_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+	.eval		= nft_immediate_eval,
+	.init		= nft_immediate_init,
+	.activate	= nft_immediate_activate,
+	.deactivate	= nft_immediate_deactivate,
+	.destroy	= nft_immediate_destroy,
+	.dump		= nft_immediate_dump,
+	.validate	= nft_immediate_validate,
+	.reduce		= nft_immediate_reduce,
+	.offload	= nft_immediate_offload,
+	.offload_action	= nft_immediate_offload_action,
+};
+
+struct nft_expr_type nft_imm_type __read_mostly = {
+	.name		= "immediate",
+	.ops		= &nft_imm_ops,
+	.policy		= nft_immediate_policy,
+	.maxattr	= NFTA_IMMEDIATE_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
new file mode 100644
index 000000000..eaa54964c
--- /dev/null
+++ b/net/netfilter/nft_last.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_last {
+	unsigned long	jiffies;
+	unsigned int	set;
+};
+
+struct nft_last_priv {
+	struct nft_last	*last;
+};
+
+static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
+	[NFTA_LAST_SET] = { .type = NLA_U32 },
+	[NFTA_LAST_MSECS] = { .type = NLA_U64 },
+};
+
+static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+	struct nft_last *last;
+	u64 last_jiffies;
+	int err;
+
+	last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
+	if (!last)
+		return -ENOMEM;
+
+	if (tb[NFTA_LAST_SET])
+		last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
+
+	if (last->set && tb[NFTA_LAST_MSECS]) {
+		err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
+		if (err < 0)
+			goto err;
+
+		last->jiffies = jiffies - (unsigned long)last_jiffies;
+	}
+	priv->last = last;
+
+	return 0;
+err:
+	kfree(last);
+
+	return err;
+}
+
+static void nft_last_eval(const struct nft_expr *expr,
+			  struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+	struct nft_last *last = priv->last;
+
+	if (READ_ONCE(last->jiffies) != jiffies)
+		WRITE_ONCE(last->jiffies, jiffies);
+	if (READ_ONCE(last->set) == 0)
+		WRITE_ONCE(last->set, 1);
+}
+
+static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+	struct nft_last *last = priv->last;
+	unsigned long last_jiffies = READ_ONCE(last->jiffies);
+	u32 last_set = READ_ONCE(last->set);
+	__be64 msecs;
+
+	if (time_before(jiffies, last_jiffies)) {
+		WRITE_ONCE(last->set, 0);
+		last_set = 0;
+	}
+
+	if (last_set)
+		msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
+	else
+		msecs = 0;
+
+	if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
+	    nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_last_destroy(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+
+	kfree(priv->last);
+}
+
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_last_priv *priv_dst = nft_expr_priv(dst);
+	struct nft_last_priv *priv_src = nft_expr_priv(src);
+
+	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+	if (!priv_dst->last)
+		return -ENOMEM;
+
+	priv_dst->last->set = priv_src->last->set;
+	priv_dst->last->jiffies = priv_src->last->jiffies;
+
+	return 0;
+}
+
+static const struct nft_expr_ops nft_last_ops = {
+	.type		= &nft_last_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
+	.eval		= nft_last_eval,
+	.init		= nft_last_init,
+	.destroy	= nft_last_destroy,
+	.clone		= nft_last_clone,
+	.dump		= nft_last_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+struct nft_expr_type nft_last_type __read_mostly = {
+	.name		= "last",
+	.ops		= &nft_last_ops,
+	.policy		= nft_last_policy,
+	.maxattr	= NFTA_LAST_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 000000000..36ded7d43
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_limit {
+	spinlock_t	lock;
+	u64		last;
+	u64		tokens;
+};
+
+struct nft_limit_priv {
+	struct nft_limit *limit;
+	u64		tokens_max;
+	u64		rate;
+	u64		nsecs;
+	u32		burst;
+	bool		invert;
+};
+
+static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
+{
+	u64 now, tokens;
+	s64 delta;
+
+	spin_lock_bh(&priv->limit->lock);
+	now = ktime_get_ns();
+	tokens = priv->limit->tokens + now - priv->limit->last;
+	if (tokens > priv->tokens_max)
+		tokens = priv->tokens_max;
+
+	priv->limit->last = now;
+	delta = tokens - cost;
+	if (delta >= 0) {
+		priv->limit->tokens = delta;
+		spin_unlock_bh(&priv->limit->lock);
+		return priv->invert;
+	}
+	priv->limit->tokens = tokens;
+	spin_unlock_bh(&priv->limit->lock);
+	return !priv->invert;
+}
+
+/* Use same default as in iptables. */
+#define NFT_LIMIT_PKT_BURST_DEFAULT	5
+
+static int nft_limit_init(struct nft_limit_priv *priv,
+			  const struct nlattr * const tb[], bool pkts)
+{
+	u64 unit, tokens, rate_with_burst;
+	bool invert = false;
+
+	if (tb[NFTA_LIMIT_RATE] == NULL ||
+	    tb[NFTA_LIMIT_UNIT] == NULL)
+		return -EINVAL;
+
+	priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	if (priv->rate == 0)
+		return -EINVAL;
+
+	unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+	if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
+		return -EOVERFLOW;
+
+	if (tb[NFTA_LIMIT_BURST])
+		priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+
+	if (pkts && priv->burst == 0)
+		priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
+
+	if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
+		return -EOVERFLOW;
+
+	if (pkts) {
+		u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+		if (check_mul_overflow(tmp, priv->burst, &tokens))
+			return -EOVERFLOW;
+	} else {
+		u64 tmp;
+
+		/* The token bucket size limits the number of tokens can be
+		 * accumulated. tokens_max specifies the bucket size.
+		 * tokens_max = unit * (rate + burst) / rate.
+		 */
+		if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+			return -EOVERFLOW;
+
+		tokens = div64_u64(tmp, priv->rate);
+	}
+
+	if (tb[NFTA_LIMIT_FLAGS]) {
+		u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
+
+		if (flags & ~NFT_LIMIT_F_INV)
+			return -EOPNOTSUPP;
+
+		if (flags & NFT_LIMIT_F_INV)
+			invert = true;
+	}
+
+	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
+	if (!priv->limit)
+		return -ENOMEM;
+
+	priv->limit->tokens = tokens;
+	priv->tokens_max = priv->limit->tokens;
+	priv->invert = invert;
+	priv->limit->last = ktime_get_ns();
+	spin_lock_init(&priv->limit->lock);
+
+	return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv,
+			  enum nft_limit_type type)
+{
+	u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0;
+	u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC);
+
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate),
+			 NFTA_LIMIT_PAD) ||
+	    nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
+			 NFTA_LIMIT_PAD) ||
+	    nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) ||
+	    nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) ||
+	    nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_limit_destroy(const struct nft_ctx *ctx,
+			      const struct nft_limit_priv *priv)
+{
+	kfree(priv->limit);
+}
+
+static int nft_limit_clone(struct nft_limit_priv *priv_dst,
+			   const struct nft_limit_priv *priv_src)
+{
+	priv_dst->tokens_max = priv_src->tokens_max;
+	priv_dst->rate = priv_src->rate;
+	priv_dst->nsecs = priv_src->nsecs;
+	priv_dst->burst = priv_src->burst;
+	priv_dst->invert = priv_src->invert;
+
+	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+	if (!priv_dst->limit)
+		return -ENOMEM;
+
+	spin_lock_init(&priv_dst->limit->lock);
+	priv_dst->limit->tokens = priv_src->tokens_max;
+	priv_dst->limit->last = ktime_get_ns();
+
+	return 0;
+}
+
+struct nft_limit_priv_pkts {
+	struct nft_limit_priv	limit;
+	u64			cost;
+};
+
+static void nft_limit_pkts_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+
+	if (nft_limit_eval(&priv->limit, priv->cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+	[NFTA_LIMIT_RATE]	= { .type = NLA_U64 },
+	[NFTA_LIMIT_UNIT]	= { .type = NLA_U64 },
+	[NFTA_LIMIT_BURST]	= { .type = NLA_U32 },
+	[NFTA_LIMIT_TYPE]	= { .type = NLA_U32 },
+	[NFTA_LIMIT_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_limit_pkts_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+	int err;
+
+	err = nft_limit_init(&priv->limit, tb, true);
+	if (err < 0)
+		return err;
+
+	priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+	return 0;
+}
+
+static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+
+	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+
+	nft_limit_destroy(ctx, &priv->limit);
+}
+
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
+	struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+
+	priv_dst->cost = priv_src->cost;
+
+	return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+}
+
+static struct nft_expr_type nft_limit_type;
+static const struct nft_expr_ops nft_limit_pkts_ops = {
+	.type		= &nft_limit_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
+	.eval		= nft_limit_pkts_eval,
+	.init		= nft_limit_pkts_init,
+	.destroy	= nft_limit_pkts_destroy,
+	.clone		= nft_limit_pkts_clone,
+	.dump		= nft_limit_pkts_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_priv *priv = nft_expr_priv(expr);
+	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+	if (nft_limit_eval(priv, cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_limit_priv *priv = nft_expr_priv(expr);
+
+	return nft_limit_init(priv, tb, false);
+}
+
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+				const struct nft_expr *expr)
+{
+	const struct nft_limit_priv *priv = nft_expr_priv(expr);
+
+	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	const struct nft_limit_priv *priv = nft_expr_priv(expr);
+
+	nft_limit_destroy(ctx, priv);
+}
+
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
+	struct nft_limit_priv *priv_src = nft_expr_priv(src);
+
+	return nft_limit_clone(priv_dst, priv_src);
+}
+
+static const struct nft_expr_ops nft_limit_bytes_ops = {
+	.type		= &nft_limit_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)),
+	.eval		= nft_limit_bytes_eval,
+	.init		= nft_limit_bytes_init,
+	.dump		= nft_limit_bytes_dump,
+	.clone		= nft_limit_bytes_clone,
+	.destroy	= nft_limit_bytes_destroy,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops *
+nft_limit_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	if (tb[NFTA_LIMIT_TYPE] == NULL)
+		return &nft_limit_pkts_ops;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+	case NFT_LIMIT_PKTS:
+		return &nft_limit_pkts_ops;
+	case NFT_LIMIT_PKT_BYTES:
+		return &nft_limit_bytes_ops;
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_expr_type nft_limit_type __read_mostly = {
+	.name		= "limit",
+	.select_ops	= nft_limit_select_ops,
+	.policy		= nft_limit_policy,
+	.maxattr	= NFTA_LIMIT_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
+
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+	if (nft_limit_eval(&priv->limit, priv->cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+				   const struct nlattr * const tb[],
+				   struct nft_object *obj)
+{
+	struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+	int err;
+
+	err = nft_limit_init(&priv->limit, tb, true);
+	if (err < 0)
+		return err;
+
+	priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+	return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+				   struct nft_object *obj,
+				   bool reset)
+{
+	const struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx,
+				       struct nft_object *obj)
+{
+	struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+	nft_limit_destroy(ctx, &priv->limit);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
+	.init		= nft_limit_obj_pkts_init,
+	.destroy	= nft_limit_obj_pkts_destroy,
+	.eval		= nft_limit_obj_pkts_eval,
+	.dump		= nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+				     struct nft_regs *regs,
+				     const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_priv *priv = nft_obj_data(obj);
+	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+	if (nft_limit_eval(priv, cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+				    const struct nlattr * const tb[],
+				    struct nft_object *obj)
+{
+	struct nft_limit_priv *priv = nft_obj_data(obj);
+
+	return nft_limit_init(priv, tb, false);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+				    struct nft_object *obj,
+				    bool reset)
+{
+	const struct nft_limit_priv *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx,
+					struct nft_object *obj)
+{
+	struct nft_limit_priv *priv = nft_obj_data(obj);
+
+	nft_limit_destroy(ctx, priv);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= sizeof(struct nft_limit_priv),
+	.init		= nft_limit_obj_bytes_init,
+	.destroy	= nft_limit_obj_bytes_destroy,
+	.eval		= nft_limit_obj_bytes_eval,
+	.dump		= nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+			 const struct nlattr * const tb[])
+{
+	if (!tb[NFTA_LIMIT_TYPE])
+		return &nft_limit_obj_pkts_ops;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+	case NFT_LIMIT_PKTS:
+		return &nft_limit_obj_pkts_ops;
+	case NFT_LIMIT_PKT_BYTES:
+		return &nft_limit_obj_bytes_ops;
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+	.select_ops	= nft_limit_obj_select_ops,
+	.type		= NFT_OBJECT_LIMIT,
+	.maxattr	= NFTA_LIMIT_MAX,
+	.policy		= nft_limit_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_limit_module_init(void)
+{
+	int err;
+
+	err = nft_register_obj(&nft_limit_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_limit_type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err1:
+	nft_unregister_obj(&nft_limit_obj_type);
+	return err;
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+	nft_unregister_expr(&nft_limit_type);
+	nft_unregister_obj(&nft_limit_obj_type);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
+MODULE_DESCRIPTION("nftables limit expression support");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 000000000..0e13c003f
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/audit.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+	struct nf_loginfo	loginfo;
+	char			*prefix;
+};
+
+static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct iphdr _iph;
+	const struct iphdr *ih;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph);
+	if (!ih)
+		return false;
+
+	audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu",
+			 &ih->saddr, &ih->daddr, ih->protocol);
+
+	return true;
+}
+
+static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct ipv6hdr _ip6h;
+	const struct ipv6hdr *ih;
+	u8 nexthdr;
+	__be16 frag_off;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+	if (!ih)
+		return false;
+
+	nexthdr = ih->nexthdr;
+	ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off);
+
+	audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+			 &ih->saddr, &ih->daddr, nexthdr);
+
+	return true;
+}
+
+static void nft_log_eval_audit(const struct nft_pktinfo *pkt)
+{
+	struct sk_buff *skb = pkt->skb;
+	struct audit_buffer *ab;
+	int fam = -1;
+
+	if (!audit_enabled)
+		return;
+
+	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+	if (!ab)
+		return;
+
+	audit_log_format(ab, "mark=%#x", skb->mark);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_BRIDGE:
+		switch (eth_hdr(skb)->h_proto) {
+		case htons(ETH_P_IP):
+			fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+			break;
+		case htons(ETH_P_IPV6):
+			fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+			break;
+		}
+		break;
+	case NFPROTO_IPV4:
+		fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+		break;
+	case NFPROTO_IPV6:
+		fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+		break;
+	}
+
+	if (fam == -1)
+		audit_log_format(ab, " saddr=? daddr=? proto=-1");
+
+	audit_log_end(ab);
+}
+
+static void nft_log_eval(const struct nft_expr *expr,
+			 struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+
+	if (priv->loginfo.type == NF_LOG_TYPE_LOG &&
+	    priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) {
+		nft_log_eval_audit(pkt);
+		return;
+	}
+
+	nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
+		      nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
+		      priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+	[NFTA_LOG_GROUP]	= { .type = NLA_U16 },
+	[NFTA_LOG_PREFIX]	= { .type = NLA_STRING,
+				    .len = NF_LOG_PREFIXLEN - 1 },
+	[NFTA_LOG_SNAPLEN]	= { .type = NLA_U32 },
+	[NFTA_LOG_QTHRESHOLD]	= { .type = NLA_U16 },
+	[NFTA_LOG_LEVEL]	= { .type = NLA_U32 },
+	[NFTA_LOG_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_log_modprobe(struct net *net, enum nf_log_type t)
+{
+	switch (t) {
+	case NF_LOG_TYPE_LOG:
+		return nft_request_module(net, "%s", "nf_log_syslog");
+	case NF_LOG_TYPE_ULOG:
+		return nft_request_module(net, "%s", "nfnetlink_log");
+	case NF_LOG_TYPE_MAX:
+		break;
+	}
+
+	return -ENOENT;
+}
+
+static int nft_log_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+	struct nf_loginfo *li = &priv->loginfo;
+	const struct nlattr *nla;
+	int err;
+
+	li->type = NF_LOG_TYPE_LOG;
+	if (tb[NFTA_LOG_LEVEL] != NULL &&
+	    tb[NFTA_LOG_GROUP] != NULL)
+		return -EINVAL;
+	if (tb[NFTA_LOG_GROUP] != NULL) {
+		li->type = NF_LOG_TYPE_ULOG;
+		if (tb[NFTA_LOG_FLAGS] != NULL)
+			return -EINVAL;
+	}
+
+	nla = tb[NFTA_LOG_PREFIX];
+	if (nla != NULL) {
+		priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+		if (priv->prefix == NULL)
+			return -ENOMEM;
+		nla_strscpy(priv->prefix, nla, nla_len(nla) + 1);
+	} else {
+		priv->prefix = (char *)nft_log_null_prefix;
+	}
+
+	switch (li->type) {
+	case NF_LOG_TYPE_LOG:
+		if (tb[NFTA_LOG_LEVEL] != NULL) {
+			li->u.log.level =
+				ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
+		} else {
+			li->u.log.level = NFT_LOGLEVEL_WARNING;
+		}
+		if (li->u.log.level > NFT_LOGLEVEL_AUDIT) {
+			err = -EINVAL;
+			goto err1;
+		}
+
+		if (tb[NFTA_LOG_FLAGS] != NULL) {
+			li->u.log.logflags =
+				ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+			if (li->u.log.logflags & ~NF_LOG_MASK) {
+				err = -EINVAL;
+				goto err1;
+			}
+		}
+		break;
+	case NF_LOG_TYPE_ULOG:
+		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+		if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+			li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
+			li->u.ulog.copy_len =
+				ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+		}
+		if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+			li->u.ulog.qthreshold =
+				ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+		}
+		break;
+	}
+
+	if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
+		return 0;
+
+	err = nf_logger_find_get(ctx->family, li->type);
+	if (err < 0) {
+		if (nft_log_modprobe(ctx->net, li->type) == -EAGAIN)
+			err = -EAGAIN;
+
+		goto err1;
+	}
+
+	return 0;
+
+err1:
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+	return err;
+}
+
+static void nft_log_destroy(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr)
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+	struct nf_loginfo *li = &priv->loginfo;
+
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+
+	if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
+		return;
+
+	nf_logger_put(ctx->family, li->type);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	const struct nf_loginfo *li = &priv->loginfo;
+
+	if (priv->prefix != nft_log_null_prefix)
+		if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+			goto nla_put_failure;
+	switch (li->type) {
+	case NF_LOG_TYPE_LOG:
+		if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level)))
+			goto nla_put_failure;
+
+		if (li->u.log.logflags) {
+			if (nla_put_be32(skb, NFTA_LOG_FLAGS,
+					 htonl(li->u.log.logflags)))
+				goto nla_put_failure;
+		}
+		break;
+	case NF_LOG_TYPE_ULOG:
+		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+			goto nla_put_failure;
+
+		if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
+			if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+					 htonl(li->u.ulog.copy_len)))
+				goto nla_put_failure;
+		}
+		if (li->u.ulog.qthreshold) {
+			if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+					 htons(li->u.ulog.qthreshold)))
+				goto nla_put_failure;
+		}
+		break;
+	}
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_log_type;
+static const struct nft_expr_ops nft_log_ops = {
+	.type		= &nft_log_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_log)),
+	.eval		= nft_log_eval,
+	.init		= nft_log_init,
+	.destroy	= nft_log_destroy,
+	.dump		= nft_log_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_log_type __read_mostly = {
+	.name		= "log",
+	.ops		= &nft_log_ops,
+	.policy		= nft_log_policy,
+	.maxattr	= NFTA_LOG_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_log_module_init(void)
+{
+	return nft_register_expr(&nft_log_type);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+	nft_unregister_expr(&nft_log_type);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
+MODULE_DESCRIPTION("Netfilter nf_tables log module");
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
new file mode 100644
index 000000000..68a5dea80
--- /dev/null
+++ b/net/netfilter/nft_lookup.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_lookup {
+	struct nft_set			*set;
+	u8				sreg;
+	u8				dreg;
+	bool				invert;
+	struct nft_set_binding		binding;
+};
+
+#ifdef CONFIG_RETPOLINE
+bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
+{
+	if (set->ops == &nft_set_hash_fast_type.ops)
+		return nft_hash_lookup_fast(net, set, key, ext);
+	if (set->ops == &nft_set_hash_type.ops)
+		return nft_hash_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_rhash_type.ops)
+		return nft_rhash_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_bitmap_type.ops)
+		return nft_bitmap_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_pipapo_type.ops)
+		return nft_pipapo_lookup(net, set, key, ext);
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+	if (set->ops == &nft_set_pipapo_avx2_type.ops)
+		return nft_pipapo_avx2_lookup(net, set, key, ext);
+#endif
+
+	if (set->ops == &nft_set_rbtree_type.ops)
+		return nft_rbtree_lookup(net, set, key, ext);
+
+	WARN_ON_ONCE(1);
+	return set->ops->lookup(net, set, key, ext);
+}
+EXPORT_SYMBOL_GPL(nft_set_do_lookup);
+#endif
+
+void nft_lookup_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+	const struct nft_set *set = priv->set;
+	const struct nft_set_ext *ext = NULL;
+	const struct net *net = nft_net(pkt);
+	bool found;
+
+	found =	nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
+				  priv->invert;
+	if (!found) {
+		ext = nft_set_catchall_lookup(net, set);
+		if (!ext) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+	}
+
+	if (ext) {
+		if (set->flags & NFT_SET_MAP)
+			nft_data_copy(&regs->data[priv->dreg],
+				      nft_set_ext_data(ext), set->dlen);
+
+		nft_set_elem_update_expr(ext, regs, pkt);
+	}
+}
+
+static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
+	[NFTA_LOOKUP_SET]	= { .type = NLA_STRING,
+				    .len = NFT_SET_MAXNAMELEN - 1 },
+	[NFTA_LOOKUP_SET_ID]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_SREG]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_DREG]	= { .type = NLA_U32 },
+	[NFTA_LOOKUP_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_lookup_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set *set;
+	u32 flags;
+	int err;
+
+	if (tb[NFTA_LOOKUP_SET] == NULL ||
+	    tb[NFTA_LOOKUP_SREG] == NULL)
+		return -EINVAL;
+
+	set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+				    tb[NFTA_LOOKUP_SET_ID], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+				      set->klen);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_LOOKUP_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
+
+		if (flags & ~NFT_LOOKUP_F_INV)
+			return -EINVAL;
+
+		if (flags & NFT_LOOKUP_F_INV) {
+			if (set->flags & NFT_SET_MAP)
+				return -EINVAL;
+			priv->invert = true;
+		}
+	}
+
+	if (tb[NFTA_LOOKUP_DREG] != NULL) {
+		if (priv->invert)
+			return -EINVAL;
+		if (!(set->flags & NFT_SET_MAP))
+			return -EINVAL;
+
+		err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
+					       &priv->dreg, NULL, set->dtype,
+					       set->dlen);
+		if (err < 0)
+			return err;
+	} else if (set->flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	priv->binding.flags = set->flags & NFT_SET_MAP;
+
+	err = nf_tables_bind_set(ctx, set, &priv->binding);
+	if (err < 0)
+		return err;
+
+	priv->set = set;
+	return 0;
+}
+
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_activate_set(ctx, priv->set);
+}
+
+static void nft_lookup_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_destroy_set(ctx, priv->set);
+}
+
+static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0;
+
+	if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (priv->set->flags & NFT_SET_MAP)
+		if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_lookup_validate(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nft_data **d)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+	struct nft_set_iter iter;
+
+	if (!(priv->set->flags & NFT_SET_MAP) ||
+	    priv->set->dtype != NFT_DATA_VERDICT)
+		return 0;
+
+	iter.genmask	= nft_genmask_next(ctx->net);
+	iter.skip	= 0;
+	iter.count	= 0;
+	iter.err	= 0;
+	iter.fn		= nft_setelem_validate;
+
+	priv->set->ops->walk(ctx, priv->set, &iter);
+	if (!iter.err)
+		iter.err = nft_set_catchall_validate(ctx, priv->set);
+
+	if (iter.err < 0)
+		return iter.err;
+
+	return 0;
+}
+
+static bool nft_lookup_reduce(struct nft_regs_track *track,
+			      const struct nft_expr *expr)
+{
+	const struct nft_lookup *priv = nft_expr_priv(expr);
+
+	if (priv->set->flags & NFT_SET_MAP)
+		nft_reg_track_cancel(track, priv->dreg, priv->set->dlen);
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_lookup_ops = {
+	.type		= &nft_lookup_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
+	.eval		= nft_lookup_eval,
+	.init		= nft_lookup_init,
+	.activate	= nft_lookup_activate,
+	.deactivate	= nft_lookup_deactivate,
+	.destroy	= nft_lookup_destroy,
+	.dump		= nft_lookup_dump,
+	.validate	= nft_lookup_validate,
+	.reduce		= nft_lookup_reduce,
+};
+
+struct nft_expr_type nft_lookup_type __read_mostly = {
+	.name		= "lookup",
+	.ops		= &nft_lookup_ops,
+	.policy		= nft_lookup_policy,
+	.maxattr	= NFTA_LOOKUP_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
new file mode 100644
index 000000000..026b4f87d
--- /dev/null
+++ b/net/netfilter/nft_masq.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_masquerade.h>
+
+struct nft_masq {
+	u32			flags;
+	u8			sreg_proto_min;
+	u8			sreg_proto_max;
+};
+
+static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+	[NFTA_MASQ_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_MASQ_REG_PROTO_MIN]	= { .type = NLA_U32 },
+	[NFTA_MASQ_REG_PROTO_MAX]	= { .type = NLA_U32 },
+};
+
+static int nft_masq_validate(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nft_data **data)
+{
+	int err;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
+	return nft_chain_validate_hooks(ctx->chain,
+				        (1 << NF_INET_POST_ROUTING));
+}
+
+static int nft_masq_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	u32 plen = sizeof_field(struct nf_nat_range, min_proto.all);
+	struct nft_masq *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_MASQ_FLAGS]) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+		if (priv->flags & ~NF_NAT_RANGE_MASK)
+			return -EINVAL;
+	}
+
+	if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
+		err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+					      &priv->sreg_proto_min, plen);
+		if (err < 0)
+			return err;
+
+		if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
+			err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+						      &priv->sreg_proto_max,
+						      plen);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_proto_max = priv->sreg_proto_min;
+		}
+	}
+
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_masq *priv = nft_expr_priv(expr);
+
+	if (priv->flags != 0 &&
+	    nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+		goto nla_put_failure;
+
+	if (priv->sreg_proto_min) {
+		if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN,
+				      priv->sreg_proto_min) ||
+		    nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX,
+				      priv->sreg_proto_max))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+						    &range, nft_out(pkt));
+}
+
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+	.type		= &nft_masq_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv4_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv4_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv4_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+						    nft_out(pkt));
+}
+
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+	.type		= &nft_masq_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv6_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv6_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv6_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_ipv6(void)
+{
+	return nft_register_expr(&nft_masq_ipv6_type);
+}
+
+static void nft_masq_module_exit_ipv6(void)
+{
+	nft_unregister_expr(&nft_masq_ipv6_type);
+}
+#else
+static inline int nft_masq_module_init_ipv6(void) { return 0; }
+static inline void nft_masq_module_exit_ipv6(void) {}
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_masq_inet_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		return nft_masq_ipv4_eval(expr, regs, pkt);
+	case NFPROTO_IPV6:
+		return nft_masq_ipv6_eval(expr, regs, pkt);
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+static void
+nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_masq_inet_type;
+static const struct nft_expr_ops nft_masq_inet_ops = {
+	.type		= &nft_masq_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_inet_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_inet_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_masq_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "masq",
+	.ops		= &nft_masq_inet_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_inet(void)
+{
+	return nft_register_expr(&nft_masq_inet_type);
+}
+
+static void nft_masq_module_exit_inet(void)
+{
+	nft_unregister_expr(&nft_masq_inet_type);
+}
+#else
+static inline int nft_masq_module_init_inet(void) { return 0; }
+static inline void nft_masq_module_exit_inet(void) {}
+#endif
+
+static int __init nft_masq_module_init(void)
+{
+	int ret;
+
+	ret = nft_masq_module_init_ipv6();
+	if (ret < 0)
+		return ret;
+
+	ret = nft_masq_module_init_inet();
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		return ret;
+	}
+
+	ret = nft_register_expr(&nft_masq_ipv4_type);
+	if (ret < 0) {
+		nft_masq_module_exit_inet();
+		nft_masq_module_exit_ipv6();
+		return ret;
+	}
+
+	ret = nf_nat_masquerade_inet_register_notifiers();
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		nft_masq_module_exit_inet();
+		nft_unregister_expr(&nft_masq_ipv4_type);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit nft_masq_module_exit(void)
+{
+	nft_masq_module_exit_ipv6();
+	nft_masq_module_exit_inet();
+	nft_unregister_expr(&nft_masq_ipv4_type);
+	nf_nat_masquerade_inet_unregister_notifiers();
+}
+
+module_init(nft_masq_module_init);
+module_exit(nft_masq_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_EXPR("masq");
+MODULE_DESCRIPTION("Netfilter nftables masquerade expression support");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 000000000..6e8332192
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2014 Intel Corporation
+ * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/static_key.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nft_meta.h>
+#include <net/netfilter/nf_tables_offload.h>
+
+#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+
+#define NFT_META_SECS_PER_MINUTE	60
+#define NFT_META_SECS_PER_HOUR		3600
+#define NFT_META_SECS_PER_DAY		86400
+#define NFT_META_DAYS_PER_WEEK		7
+
+static u8 nft_meta_weekday(void)
+{
+	time64_t secs = ktime_get_real_seconds();
+	unsigned int dse;
+	u8 wday;
+
+	secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
+	dse = div_u64(secs, NFT_META_SECS_PER_DAY);
+	wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
+
+	return wday;
+}
+
+static u32 nft_meta_hour(time64_t secs)
+{
+	struct tm tm;
+
+	time64_to_tm(secs, 0, &tm);
+
+	return tm.tm_hour * NFT_META_SECS_PER_HOUR
+		+ tm.tm_min * NFT_META_SECS_PER_MINUTE
+		+ tm.tm_sec;
+}
+
+static noinline_for_stack void
+nft_meta_get_eval_time(enum nft_meta_keys key,
+		       u32 *dest)
+{
+	switch (key) {
+	case NFT_META_TIME_NS:
+		nft_reg_store64((u64 *)dest, ktime_get_real_ns());
+		break;
+	case NFT_META_TIME_DAY:
+		nft_reg_store8(dest, nft_meta_weekday());
+		break;
+	case NFT_META_TIME_HOUR:
+		*dest = nft_meta_hour(ktime_get_real_seconds());
+		break;
+	default:
+		break;
+	}
+}
+
+static noinline bool
+nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt,
+			     u32 *dest)
+{
+	const struct sk_buff *skb = pkt->skb;
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+			nft_reg_store8(dest, PACKET_MULTICAST);
+		else
+			nft_reg_store8(dest, PACKET_BROADCAST);
+		break;
+	case NFPROTO_IPV6:
+		nft_reg_store8(dest, PACKET_MULTICAST);
+		break;
+	case NFPROTO_NETDEV:
+		switch (skb->protocol) {
+		case htons(ETH_P_IP): {
+			int noff = skb_network_offset(skb);
+			struct iphdr *iph, _iph;
+
+			iph = skb_header_pointer(skb, noff,
+						 sizeof(_iph), &_iph);
+			if (!iph)
+				return false;
+
+			if (ipv4_is_multicast(iph->daddr))
+				nft_reg_store8(dest, PACKET_MULTICAST);
+			else
+				nft_reg_store8(dest, PACKET_BROADCAST);
+
+			break;
+		}
+		case htons(ETH_P_IPV6):
+			nft_reg_store8(dest, PACKET_MULTICAST);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			return false;
+		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	return true;
+}
+
+static noinline bool
+nft_meta_get_eval_skugid(enum nft_meta_keys key,
+			 u32 *dest,
+			 const struct nft_pktinfo *pkt)
+{
+	struct sock *sk = skb_to_full_sk(pkt->skb);
+	struct socket *sock;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+		return false;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	sock = sk->sk_socket;
+	if (!sock || !sock->file) {
+		read_unlock_bh(&sk->sk_callback_lock);
+		return false;
+	}
+
+	switch (key) {
+	case NFT_META_SKUID:
+		*dest = from_kuid_munged(sock_net(sk)->user_ns,
+					 sock->file->f_cred->fsuid);
+		break;
+	case NFT_META_SKGID:
+		*dest =	from_kgid_munged(sock_net(sk)->user_ns,
+					 sock->file->f_cred->fsgid);
+		break;
+	default:
+		break;
+	}
+
+	read_unlock_bh(&sk->sk_callback_lock);
+	return true;
+}
+
+#ifdef CONFIG_CGROUP_NET_CLASSID
+static noinline bool
+nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt)
+{
+	struct sock *sk = skb_to_full_sk(pkt->skb);
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+		return false;
+
+	*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
+	return true;
+}
+#endif
+
+static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
+					    u32 *dest,
+					    const struct nft_pktinfo *pkt)
+{
+	const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
+
+	switch (key) {
+	case NFT_META_IIFKIND:
+		if (!in || !in->rtnl_link_ops)
+			return false;
+		strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
+	case NFT_META_OIFKIND:
+		if (!out || !out->rtnl_link_ops)
+			return false;
+		strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
+{
+	*dest = dev ? dev->ifindex : 0;
+}
+
+static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
+{
+	strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+}
+
+static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
+{
+	if (!dev)
+		return false;
+
+	nft_reg_store16(dest, dev->type);
+	return true;
+}
+
+static bool nft_meta_store_ifgroup(u32 *dest, const struct net_device *dev)
+{
+	if (!dev)
+		return false;
+
+	*dest = dev->group;
+	return true;
+}
+
+static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
+				     const struct nft_pktinfo *pkt)
+{
+	switch (key) {
+	case NFT_META_IIFNAME:
+		nft_meta_store_ifname(dest, nft_in(pkt));
+		break;
+	case NFT_META_OIFNAME:
+		nft_meta_store_ifname(dest, nft_out(pkt));
+		break;
+	case NFT_META_IIF:
+		nft_meta_store_ifindex(dest, nft_in(pkt));
+		break;
+	case NFT_META_OIF:
+		nft_meta_store_ifindex(dest, nft_out(pkt));
+		break;
+	case NFT_META_IFTYPE:
+		if (!nft_meta_store_iftype(dest, pkt->skb->dev))
+			return false;
+		break;
+	case __NFT_META_IIFTYPE:
+		if (!nft_meta_store_iftype(dest, nft_in(pkt)))
+			return false;
+		break;
+	case NFT_META_OIFTYPE:
+		if (!nft_meta_store_iftype(dest, nft_out(pkt)))
+			return false;
+		break;
+	case NFT_META_IIFGROUP:
+		if (!nft_meta_store_ifgroup(dest, nft_in(pkt)))
+			return false;
+		break;
+	case NFT_META_OIFGROUP:
+		if (!nft_meta_store_ifgroup(dest, nft_out(pkt)))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+#ifdef CONFIG_IP_ROUTE_CLASSID
+static noinline bool
+nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
+{
+	const struct dst_entry *dst = skb_dst(skb);
+
+	if (!dst)
+		return false;
+
+	*dest = dst->tclassid;
+	return true;
+}
+#endif
+
+static noinline u32 nft_meta_get_eval_sdif(const struct nft_pktinfo *pkt)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		return inet_sdif(pkt->skb);
+	case NFPROTO_IPV6:
+		return inet6_sdif(pkt->skb);
+	}
+
+	return 0;
+}
+
+static noinline void
+nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt)
+{
+	u32 sdif = nft_meta_get_eval_sdif(pkt);
+	const struct net_device *dev;
+
+	dev = sdif ? dev_get_by_index_rcu(nft_net(pkt), sdif) : NULL;
+	nft_meta_store_ifname(dest, dev);
+}
+
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs,
+		       const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	u32 *dest = &regs->data[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_LEN:
+		*dest = skb->len;
+		break;
+	case NFT_META_PROTOCOL:
+		nft_reg_store16(dest, (__force u16)skb->protocol);
+		break;
+	case NFT_META_NFPROTO:
+		nft_reg_store8(dest, nft_pf(pkt));
+		break;
+	case NFT_META_L4PROTO:
+		if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+			goto err;
+		nft_reg_store8(dest, pkt->tprot);
+		break;
+	case NFT_META_PRIORITY:
+		*dest = skb->priority;
+		break;
+	case NFT_META_MARK:
+		*dest = skb->mark;
+		break;
+	case NFT_META_IIF:
+	case NFT_META_OIF:
+	case NFT_META_IIFNAME:
+	case NFT_META_OIFNAME:
+	case NFT_META_IIFTYPE:
+	case NFT_META_OIFTYPE:
+	case NFT_META_IIFGROUP:
+	case NFT_META_OIFGROUP:
+		if (!nft_meta_get_eval_ifname(priv->key, dest, pkt))
+			goto err;
+		break;
+	case NFT_META_SKUID:
+	case NFT_META_SKGID:
+		if (!nft_meta_get_eval_skugid(priv->key, dest, pkt))
+			goto err;
+		break;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	case NFT_META_RTCLASSID:
+		if (!nft_meta_get_eval_rtclassid(skb, dest))
+			goto err;
+		break;
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		*dest = skb->secmark;
+		break;
+#endif
+	case NFT_META_PKTTYPE:
+		if (skb->pkt_type != PACKET_LOOPBACK) {
+			nft_reg_store8(dest, skb->pkt_type);
+			break;
+		}
+
+		if (!nft_meta_get_eval_pkttype_lo(pkt, dest))
+			goto err;
+		break;
+	case NFT_META_CPU:
+		*dest = raw_smp_processor_id();
+		break;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case NFT_META_CGROUP:
+		if (!nft_meta_get_eval_cgroup(dest, pkt))
+			goto err;
+		break;
+#endif
+	case NFT_META_PRANDOM:
+		*dest = get_random_u32();
+		break;
+#ifdef CONFIG_XFRM
+	case NFT_META_SECPATH:
+		nft_reg_store8(dest, secpath_exists(skb));
+		break;
+#endif
+	case NFT_META_IIFKIND:
+	case NFT_META_OIFKIND:
+		if (!nft_meta_get_eval_kind(priv->key, dest, pkt))
+			goto err;
+		break;
+	case NFT_META_TIME_NS:
+	case NFT_META_TIME_DAY:
+	case NFT_META_TIME_HOUR:
+		nft_meta_get_eval_time(priv->key, dest);
+		break;
+	case NFT_META_SDIF:
+		*dest = nft_meta_get_eval_sdif(pkt);
+		break;
+	case NFT_META_SDIFNAME:
+		nft_meta_get_eval_sdifname(dest, pkt);
+		break;
+	default:
+		WARN_ON(1);
+		goto err;
+	}
+	return;
+
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
+
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs,
+		       const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *meta = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 *sreg = &regs->data[meta->sreg];
+	u32 value = *sreg;
+	u8 value8;
+
+	switch (meta->key) {
+	case NFT_META_MARK:
+		skb->mark = value;
+		break;
+	case NFT_META_PRIORITY:
+		skb->priority = value;
+		break;
+	case NFT_META_PKTTYPE:
+		value8 = nft_reg_load8(sreg);
+
+		if (skb->pkt_type != value8 &&
+		    skb_pkt_type_ok(value8) &&
+		    skb_pkt_type_ok(skb->pkt_type))
+			skb->pkt_type = value8;
+		break;
+	case NFT_META_NFTRACE:
+		value8 = nft_reg_load8(sreg);
+
+		skb->nf_trace = !!value8;
+		break;
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		skb->secmark = value;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+	}
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
+
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_DREG]	= { .type = NLA_U32 },
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+	[NFTA_META_SREG]	= { .type = NLA_U32 },
+};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
+
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	unsigned int len;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_PROTOCOL:
+	case NFT_META_IIFTYPE:
+	case NFT_META_OIFTYPE:
+		len = sizeof(u16);
+		break;
+	case NFT_META_NFPROTO:
+	case NFT_META_L4PROTO:
+	case NFT_META_LEN:
+	case NFT_META_PRIORITY:
+	case NFT_META_MARK:
+	case NFT_META_IIF:
+	case NFT_META_OIF:
+	case NFT_META_SDIF:
+	case NFT_META_SKUID:
+	case NFT_META_SKGID:
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+	case NFT_META_PKTTYPE:
+	case NFT_META_CPU:
+	case NFT_META_IIFGROUP:
+	case NFT_META_OIFGROUP:
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case NFT_META_CGROUP:
+#endif
+		len = sizeof(u32);
+		break;
+	case NFT_META_IIFNAME:
+	case NFT_META_OIFNAME:
+	case NFT_META_IIFKIND:
+	case NFT_META_OIFKIND:
+	case NFT_META_SDIFNAME:
+		len = IFNAMSIZ;
+		break;
+	case NFT_META_PRANDOM:
+		len = sizeof(u32);
+		break;
+#ifdef CONFIG_XFRM
+	case NFT_META_SECPATH:
+		len = sizeof(u8);
+		break;
+#endif
+	case NFT_META_TIME_NS:
+		len = sizeof(u64);
+		break;
+	case NFT_META_TIME_DAY:
+		len = sizeof(u8);
+		break;
+	case NFT_META_TIME_HOUR:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->len = len;
+	return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, len);
+}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
+
+static int nft_meta_get_validate_sdif(const struct nft_ctx *ctx)
+{
+	unsigned int hooks;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		hooks = (1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_FORWARD);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
+{
+#ifdef CONFIG_XFRM
+	unsigned int hooks;
+
+	switch (ctx->family) {
+	case NFPROTO_NETDEV:
+		hooks = 1 << NF_NETDEV_INGRESS;
+		break;
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		hooks = (1 << NF_INET_PRE_ROUTING) |
+			(1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_FORWARD);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+#else
+	return 0;
+#endif
+}
+
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr,
+				 const struct nft_data **data)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	switch (priv->key) {
+	case NFT_META_SECPATH:
+		return nft_meta_get_validate_xfrm(ctx);
+	case NFT_META_SDIF:
+	case NFT_META_SDIFNAME:
+		return nft_meta_get_validate_sdif(ctx);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nft_data **data)
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	if (priv->key != NFT_META_PKTTYPE)
+		return 0;
+
+	switch (ctx->family) {
+	case NFPROTO_BRIDGE:
+		hooks = 1 << NF_BR_PRE_ROUTING;
+		break;
+	case NFPROTO_NETDEV:
+		hooks = 1 << NF_NETDEV_INGRESS;
+		break;
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		hooks = 1 << NF_INET_PRE_ROUTING;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	unsigned int len;
+	int err;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		len = sizeof(u32);
+		break;
+	case NFT_META_NFTRACE:
+		len = sizeof(u8);
+		break;
+	case NFT_META_PKTTYPE:
+		len = sizeof(u8);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->len = len;
+	err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+	if (err < 0)
+		return err;
+
+	if (priv->key == NFT_META_NFTRACE)
+		static_branch_inc(&nft_trace_enabled);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
+
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
+
+int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
+
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (priv->key == NFT_META_NFTRACE)
+		static_branch_dec(&nft_trace_enabled);
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
+static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
+				struct nft_flow_rule *flow,
+				const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_PROTOCOL:
+		NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto,
+					sizeof(__u16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+		break;
+	case NFT_META_L4PROTO:
+		NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+					sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	case NFT_META_IIF:
+		NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta,
+					ingress_ifindex, sizeof(__u32), reg);
+		break;
+	case NFT_META_IIFTYPE:
+		NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta,
+					ingress_iftype, sizeof(__u16), reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+bool nft_meta_get_reduce(struct nft_regs_track *track,
+			 const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct nft_meta *meta;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	meta = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != meta->key ||
+	    priv->dreg != meta->dreg) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+EXPORT_SYMBOL_GPL(nft_meta_get_reduce);
+
+static const struct nft_expr_ops nft_meta_get_ops = {
+	.type		= &nft_meta_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_get_eval,
+	.init		= nft_meta_get_init,
+	.dump		= nft_meta_get_dump,
+	.reduce		= nft_meta_get_reduce,
+	.validate	= nft_meta_get_validate,
+	.offload	= nft_meta_get_offload,
+};
+
+static bool nft_meta_set_reduce(struct nft_regs_track *track,
+				const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_meta_get_ops)
+			continue;
+
+		__nft_reg_track_cancel(track, i);
+	}
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_meta_set_ops = {
+	.type		= &nft_meta_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_set_eval,
+	.init		= nft_meta_set_init,
+	.destroy	= nft_meta_set_destroy,
+	.dump		= nft_meta_set_dump,
+	.reduce		= nft_meta_set_reduce,
+	.validate	= nft_meta_set_validate,
+};
+
+static const struct nft_expr_ops *
+nft_meta_select_ops(const struct nft_ctx *ctx,
+		    const struct nlattr * const tb[])
+{
+	if (tb[NFTA_META_KEY] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+		return ERR_PTR(-EINVAL);
+
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
+	if (ctx->family == NFPROTO_BRIDGE)
+		return ERR_PTR(-EAGAIN);
+#endif
+	if (tb[NFTA_META_DREG])
+		return &nft_meta_get_ops;
+
+	if (tb[NFTA_META_SREG])
+		return &nft_meta_set_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
+struct nft_expr_type nft_meta_type __read_mostly = {
+	.name		= "meta",
+	.select_ops	= nft_meta_select_ops,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NETWORK_SECMARK
+struct nft_secmark {
+	u32 secid;
+	char *ctx;
+};
+
+static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = {
+	[NFTA_SECMARK_CTX]     = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN },
+};
+
+static int nft_secmark_compute_secid(struct nft_secmark *priv)
+{
+	u32 tmp_secid = 0;
+	int err;
+
+	err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid);
+	if (err)
+		return err;
+
+	if (!tmp_secid)
+		return -ENOENT;
+
+	err = security_secmark_relabel_packet(tmp_secid);
+	if (err)
+		return err;
+
+	priv->secid = tmp_secid;
+	return 0;
+}
+
+static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	const struct nft_secmark *priv = nft_obj_data(obj);
+	struct sk_buff *skb = pkt->skb;
+
+	skb->secmark = priv->secid;
+}
+
+static int nft_secmark_obj_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_object *obj)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+	int err;
+
+	if (tb[NFTA_SECMARK_CTX] == NULL)
+		return -EINVAL;
+
+	priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
+	if (!priv->ctx)
+		return -ENOMEM;
+
+	err = nft_secmark_compute_secid(priv);
+	if (err) {
+		kfree(priv->ctx);
+		return err;
+	}
+
+	security_secmark_refcount_inc();
+
+	return 0;
+}
+
+static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+				bool reset)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+	int err;
+
+	if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx))
+		return -1;
+
+	if (reset) {
+		err = nft_secmark_compute_secid(priv);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+{
+	struct nft_secmark *priv = nft_obj_data(obj);
+
+	security_secmark_refcount_dec();
+
+	kfree(priv->ctx);
+}
+
+static const struct nft_object_ops nft_secmark_obj_ops = {
+	.type		= &nft_secmark_obj_type,
+	.size		= sizeof(struct nft_secmark),
+	.init		= nft_secmark_obj_init,
+	.eval		= nft_secmark_obj_eval,
+	.dump		= nft_secmark_obj_dump,
+	.destroy	= nft_secmark_obj_destroy,
+};
+struct nft_object_type nft_secmark_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_SECMARK,
+	.ops		= &nft_secmark_obj_ops,
+	.maxattr	= NFTA_SECMARK_MAX,
+	.policy		= nft_secmark_policy,
+	.owner		= THIS_MODULE,
+};
+#endif /* CONFIG_NETWORK_SECMARK */
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
new file mode 100644
index 000000000..ba7bcce72
--- /dev/null
+++ b/net/netfilter/nft_nat.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/string.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/ip.h>
+
+struct nft_nat {
+	u8			sreg_addr_min;
+	u8			sreg_addr_max;
+	u8			sreg_proto_min;
+	u8			sreg_proto_max;
+	enum nf_nat_manip_type  type:8;
+	u8			family;
+	u16			flags;
+};
+
+static void nft_nat_setup_addr(struct nf_nat_range2 *range,
+			       const struct nft_regs *regs,
+			       const struct nft_nat *priv)
+{
+	switch (priv->family) {
+	case AF_INET:
+		range->min_addr.ip = (__force __be32)
+				regs->data[priv->sreg_addr_min];
+		range->max_addr.ip = (__force __be32)
+				regs->data[priv->sreg_addr_max];
+		break;
+	case AF_INET6:
+		memcpy(range->min_addr.ip6, &regs->data[priv->sreg_addr_min],
+		       sizeof(range->min_addr.ip6));
+		memcpy(range->max_addr.ip6, &regs->data[priv->sreg_addr_max],
+		       sizeof(range->max_addr.ip6));
+		break;
+	}
+}
+
+static void nft_nat_setup_proto(struct nf_nat_range2 *range,
+				const struct nft_regs *regs,
+				const struct nft_nat *priv)
+{
+	range->min_proto.all = (__force __be16)
+		nft_reg_load16(&regs->data[priv->sreg_proto_min]);
+	range->max_proto.all = (__force __be16)
+		nft_reg_load16(&regs->data[priv->sreg_proto_max]);
+}
+
+static void nft_nat_setup_netmap(struct nf_nat_range2 *range,
+				 const struct nft_pktinfo *pkt,
+				 const struct nft_nat *priv)
+{
+	struct sk_buff *skb = pkt->skb;
+	union nf_inet_addr new_addr;
+	__be32 netmask;
+	int i, len = 0;
+
+	switch (priv->type) {
+	case NFT_NAT_SNAT:
+		if (nft_pf(pkt) == NFPROTO_IPV4) {
+			new_addr.ip = ip_hdr(skb)->saddr;
+			len = sizeof(struct in_addr);
+		} else {
+			new_addr.in6 = ipv6_hdr(skb)->saddr;
+			len = sizeof(struct in6_addr);
+		}
+		break;
+	case NFT_NAT_DNAT:
+		if (nft_pf(pkt) == NFPROTO_IPV4) {
+			new_addr.ip = ip_hdr(skb)->daddr;
+			len = sizeof(struct in_addr);
+		} else {
+			new_addr.in6 = ipv6_hdr(skb)->daddr;
+			len = sizeof(struct in6_addr);
+		}
+		break;
+	}
+
+	for (i = 0; i < len / sizeof(__be32); i++) {
+		netmask = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]);
+		new_addr.ip6[i] &= ~netmask;
+		new_addr.ip6[i] |= range->min_addr.ip6[i] & netmask;
+	}
+
+	range->min_addr = new_addr;
+	range->max_addr = new_addr;
+}
+
+static void nft_nat_eval(const struct nft_expr *expr,
+			 struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+
+	if (priv->sreg_addr_min) {
+		nft_nat_setup_addr(&range, regs, priv);
+		if (priv->flags & NF_NAT_RANGE_NETMAP)
+			nft_nat_setup_netmap(&range, pkt, priv);
+	}
+
+	if (priv->sreg_proto_min)
+		nft_nat_setup_proto(&range, regs, priv);
+
+	range.flags = priv->flags;
+
+	regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+	[NFTA_NAT_TYPE]		 = { .type = NLA_U32 },
+	[NFTA_NAT_FAMILY]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_ADDR_MIN]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_ADDR_MAX]	 = { .type = NLA_U32 },
+	[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
+	[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+	[NFTA_NAT_FLAGS]	 = { .type = NLA_U32 },
+};
+
+static int nft_nat_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	int err;
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
+	switch (priv->type) {
+	case NFT_NAT_SNAT:
+		err = nft_chain_validate_hooks(ctx->chain,
+					       (1 << NF_INET_POST_ROUTING) |
+					       (1 << NF_INET_LOCAL_IN));
+		break;
+	case NFT_NAT_DNAT:
+		err = nft_chain_validate_hooks(ctx->chain,
+					       (1 << NF_INET_PRE_ROUTING) |
+					       (1 << NF_INET_LOCAL_OUT));
+		break;
+	}
+
+	return err;
+}
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	unsigned int alen, plen;
+	u32 family;
+	int err;
+
+	if (tb[NFTA_NAT_TYPE] == NULL ||
+	    (tb[NFTA_NAT_REG_ADDR_MIN] == NULL &&
+	     tb[NFTA_NAT_REG_PROTO_MIN] == NULL))
+		return -EINVAL;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+	case NFT_NAT_SNAT:
+		priv->type = NF_NAT_MANIP_SRC;
+		break;
+	case NFT_NAT_DNAT:
+		priv->type = NF_NAT_MANIP_DST;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_NAT_FAMILY] == NULL)
+		return -EINVAL;
+
+	family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+	if (ctx->family != NFPROTO_INET && ctx->family != family)
+		return -EOPNOTSUPP;
+
+	switch (family) {
+	case NFPROTO_IPV4:
+		alen = sizeof_field(struct nf_nat_range, min_addr.ip);
+		break;
+	case NFPROTO_IPV6:
+		alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
+		break;
+	default:
+		if (tb[NFTA_NAT_REG_ADDR_MIN])
+			return -EAFNOSUPPORT;
+		break;
+	}
+	priv->family = family;
+
+	if (tb[NFTA_NAT_REG_ADDR_MIN]) {
+		err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+					      &priv->sreg_addr_min, alen);
+		if (err < 0)
+			return err;
+
+		if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+			err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+						      &priv->sreg_addr_max,
+						      alen);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_addr_max = priv->sreg_addr_min;
+		}
+
+		priv->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	plen = sizeof_field(struct nf_nat_range, min_proto.all);
+	if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+		err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+					      &priv->sreg_proto_min, plen);
+		if (err < 0)
+			return err;
+
+		if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+			err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+						      &priv->sreg_proto_max,
+						      plen);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_proto_max = priv->sreg_proto_min;
+		}
+
+		priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	if (tb[NFTA_NAT_FLAGS]) {
+		priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+		if (priv->flags & ~NF_NAT_RANGE_MASK)
+			return -EOPNOTSUPP;
+	}
+
+	return nf_ct_netns_get(ctx->net, family);
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NF_NAT_MANIP_SRC:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+			goto nla_put_failure;
+		break;
+	case NF_NAT_MANIP_DST:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
+		goto nla_put_failure;
+
+	if (priv->sreg_addr_min) {
+		if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN,
+				      priv->sreg_addr_min) ||
+		    nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX,
+				      priv->sreg_addr_max))
+			goto nla_put_failure;
+	}
+
+	if (priv->sreg_proto_min) {
+		if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN,
+				      priv->sreg_proto_min) ||
+		    nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX,
+				      priv->sreg_proto_max))
+			goto nla_put_failure;
+	}
+
+	if (priv->flags != 0) {
+		if (nla_put_be32(skb, NFTA_NAT_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void
+nft_nat_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	nf_ct_netns_put(ctx->net, priv->family);
+}
+
+static struct nft_expr_type nft_nat_type;
+static const struct nft_expr_ops nft_nat_ops = {
+	.type           = &nft_nat_type,
+	.size           = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.eval           = nft_nat_eval,
+	.init           = nft_nat_init,
+	.destroy        = nft_nat_destroy,
+	.dump           = nft_nat_dump,
+	.validate	= nft_nat_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_nat_type __read_mostly = {
+	.name           = "nat",
+	.ops            = &nft_nat_ops,
+	.policy         = nft_nat_policy,
+	.maxattr        = NFTA_NAT_MAX,
+	.owner          = THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_INET
+static void nft_nat_inet_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	if (priv->family == nft_pf(pkt) ||
+	    priv->family == NFPROTO_INET)
+		nft_nat_eval(expr, regs, pkt);
+}
+
+static const struct nft_expr_ops nft_nat_inet_ops = {
+	.type           = &nft_nat_type,
+	.size           = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.eval           = nft_nat_inet_eval,
+	.init           = nft_nat_init,
+	.destroy        = nft_nat_destroy,
+	.dump           = nft_nat_dump,
+	.validate	= nft_nat_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_inet_nat_type __read_mostly = {
+	.name           = "nat",
+	.family		= NFPROTO_INET,
+	.ops            = &nft_nat_inet_ops,
+	.policy         = nft_nat_policy,
+	.maxattr        = NFTA_NAT_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int nft_nat_inet_module_init(void)
+{
+	return nft_register_expr(&nft_inet_nat_type);
+}
+
+static void nft_nat_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_inet_nat_type);
+}
+#else
+static int nft_nat_inet_module_init(void) { return 0; }
+static void nft_nat_inet_module_exit(void) { }
+#endif
+
+static int __init nft_nat_module_init(void)
+{
+	int ret = nft_nat_inet_module_init();
+
+	if (ret)
+		return ret;
+
+	ret = nft_register_expr(&nft_nat_type);
+	if (ret)
+		nft_nat_inet_module_exit();
+
+	return ret;
+}
+
+static void __exit nft_nat_module_exit(void)
+{
+	nft_nat_inet_module_exit();
+	nft_unregister_expr(&nft_nat_type);
+}
+
+module_init(nft_nat_module_init);
+module_exit(nft_nat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_DESCRIPTION("Network Address Translation support");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 000000000..45d3dc9e9
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_ng_inc {
+	u8			dreg;
+	u32			modulus;
+	atomic_t		*counter;
+	u32			offset;
+};
+
+static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
+{
+	u32 nval, oval;
+
+	do {
+		oval = atomic_read(priv->counter);
+		nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
+	} while (atomic_cmpxchg(priv->counter, oval, nval) != oval);
+
+	return nval + priv->offset;
+}
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	regs->data[priv->dreg] = nft_ng_inc_gen(priv);
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+	[NFTA_NG_DREG]		= { .type = NLA_U32 },
+	[NFTA_NG_MODULUS]	= { .type = NLA_U32 },
+	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
+	[NFTA_NG_OFFSET]	= { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
+	if (!priv->counter)
+		return -ENOMEM;
+
+	atomic_set(priv->counter, priv->modulus - 1);
+
+	err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+				       NULL, NFT_DATA_VALUE, sizeof(u32));
+	if (err < 0)
+		goto err;
+
+	return 0;
+err:
+	kfree(priv->counter);
+
+	return err;
+}
+
+static bool nft_ng_inc_reduce(struct nft_regs_track *track,
+				 const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
+
+	return false;
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+		       u32 modulus, enum nft_ng_types type, u32 offset)
+{
+	if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL,
+			   priv->offset);
+}
+
+static void nft_ng_inc_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	kfree(priv->counter);
+}
+
+struct nft_ng_random {
+	u8			dreg;
+	u32			modulus;
+	u32			offset;
+};
+
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
+{
+	return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
+}
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	regs->data[priv->dreg] = nft_ng_random_gen(priv);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM,
+			   priv->offset);
+}
+
+static bool nft_ng_random_reduce(struct nft_regs_track *track,
+				 const struct nft_expr *expr)
+{
+	const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
+
+	return false;
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+	.eval		= nft_ng_inc_eval,
+	.init		= nft_ng_inc_init,
+	.destroy	= nft_ng_inc_destroy,
+	.dump		= nft_ng_inc_dump,
+	.reduce		= nft_ng_inc_reduce,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+	.eval		= nft_ng_random_eval,
+	.init		= nft_ng_random_init,
+	.dump		= nft_ng_random_dump,
+	.reduce		= nft_ng_random_reduce,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+	u32 type;
+
+	if (!tb[NFTA_NG_DREG]	 ||
+	    !tb[NFTA_NG_MODULUS] ||
+	    !tb[NFTA_NG_TYPE])
+		return ERR_PTR(-EINVAL);
+
+	type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+	switch (type) {
+	case NFT_NG_INCREMENTAL:
+		return &nft_ng_inc_ops;
+	case NFT_NG_RANDOM:
+		return &nft_ng_random_ops;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+	.name		= "numgen",
+	.select_ops	= nft_ng_select_ops,
+	.policy		= nft_ng_policy,
+	.maxattr	= NFTA_NG_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+	return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+	nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
+MODULE_DESCRIPTION("nftables number generator module");
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
new file mode 100644
index 000000000..0017bd341
--- /dev/null
+++ b/net/netfilter/nft_objref.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+#define nft_objref_priv(expr)	*((struct nft_object **)nft_expr_priv(expr))
+
+static void nft_objref_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_object *obj = nft_objref_priv(expr);
+
+	obj->ops->eval(obj, regs, pkt);
+}
+
+static int nft_objref_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_object *obj = nft_objref_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
+	u32 objtype;
+
+	if (!tb[NFTA_OBJREF_IMM_NAME] ||
+	    !tb[NFTA_OBJREF_IMM_TYPE])
+		return -EINVAL;
+
+	objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
+	obj = nft_obj_lookup(ctx->net, ctx->table,
+			     tb[NFTA_OBJREF_IMM_NAME], objtype,
+			     genmask);
+	if (IS_ERR(obj))
+		return -ENOENT;
+
+	if (!nft_use_inc(&obj->use))
+		return -EMFILE;
+
+	nft_objref_priv(expr) = obj;
+
+	return 0;
+}
+
+static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_object *obj = nft_objref_priv(expr);
+
+	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) ||
+	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+			 htonl(obj->ops->type->type)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  enum nft_trans_phase phase)
+{
+	struct nft_object *obj = nft_objref_priv(expr);
+
+	if (phase == NFT_TRANS_COMMIT)
+		return;
+
+	nft_use_dec(&obj->use);
+}
+
+static void nft_objref_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_object *obj = nft_objref_priv(expr);
+
+	nft_use_inc_restore(&obj->use);
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct nft_expr_ops nft_objref_ops = {
+	.type		= &nft_objref_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_object *)),
+	.eval		= nft_objref_eval,
+	.init		= nft_objref_init,
+	.activate	= nft_objref_activate,
+	.deactivate	= nft_objref_deactivate,
+	.dump		= nft_objref_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+struct nft_objref_map {
+	struct nft_set		*set;
+	u8			sreg;
+	struct nft_set_binding	binding;
+};
+
+static void nft_objref_map_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+	const struct nft_set *set = priv->set;
+	struct net *net = nft_net(pkt);
+	const struct nft_set_ext *ext;
+	struct nft_object *obj;
+	bool found;
+
+	found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
+	if (!found) {
+		ext = nft_set_catchall_lookup(net, set);
+		if (!ext) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+	}
+	obj = *nft_set_ext_obj(ext);
+	obj->ops->eval(obj, regs, pkt);
+}
+
+static int nft_objref_map_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set *set;
+	int err;
+
+	set = nft_set_lookup_global(ctx->net, ctx->table,
+				    tb[NFTA_OBJREF_SET_NAME],
+				    tb[NFTA_OBJREF_SET_ID], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (!(set->flags & NFT_SET_OBJECT))
+		return -EINVAL;
+
+	err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+				      set->klen);
+	if (err < 0)
+		return err;
+
+	priv->binding.flags = set->flags & NFT_SET_OBJECT;
+
+	err = nf_tables_bind_set(ctx, set, &priv->binding);
+	if (err < 0)
+		return err;
+
+	priv->set = set;
+	return 0;
+}
+
+static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) ||
+	    nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr,
+				      enum nft_trans_phase phase)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_activate_set(ctx, priv->set);
+}
+
+static void nft_objref_map_destroy(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_destroy_set(ctx, priv->set);
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct nft_expr_ops nft_objref_map_ops = {
+	.type		= &nft_objref_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
+	.eval		= nft_objref_map_eval,
+	.init		= nft_objref_map_init,
+	.activate	= nft_objref_map_activate,
+	.deactivate	= nft_objref_map_deactivate,
+	.destroy	= nft_objref_map_destroy,
+	.dump		= nft_objref_map_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops *
+nft_objref_select_ops(const struct nft_ctx *ctx,
+                      const struct nlattr * const tb[])
+{
+	if (tb[NFTA_OBJREF_SET_SREG] &&
+	    (tb[NFTA_OBJREF_SET_NAME] ||
+	     tb[NFTA_OBJREF_SET_ID]))
+		return &nft_objref_map_ops;
+	else if (tb[NFTA_OBJREF_IMM_NAME] &&
+		 tb[NFTA_OBJREF_IMM_TYPE])
+		return &nft_objref_ops;
+
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = {
+	[NFTA_OBJREF_IMM_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_OBJ_MAXNAMELEN - 1 },
+	[NFTA_OBJREF_IMM_TYPE]	= { .type = NLA_U32 },
+	[NFTA_OBJREF_SET_SREG]	= { .type = NLA_U32 },
+	[NFTA_OBJREF_SET_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_SET_MAXNAMELEN - 1 },
+	[NFTA_OBJREF_SET_ID]	= { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_objref_type __read_mostly = {
+	.name		= "objref",
+	.select_ops	= nft_objref_select_ops,
+	.policy		= nft_objref_policy,
+	.maxattr	= NFTA_OBJREF_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_objref_module_init(void)
+{
+	return nft_register_expr(&nft_objref_type);
+}
+
+static void __exit nft_objref_module_exit(void)
+{
+	nft_unregister_expr(&nft_objref_type);
+}
+
+module_init(nft_objref_module_init);
+module_exit(nft_objref_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("objref");
+MODULE_DESCRIPTION("nftables stateful object reference module");
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
new file mode 100644
index 000000000..adacf95b6
--- /dev/null
+++ b/net/netfilter/nft_osf.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <linux/netfilter/nfnetlink_osf.h>
+
+struct nft_osf {
+	u8			dreg;
+	u8			ttl;
+	u32			flags;
+};
+
+static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
+	[NFTA_OSF_DREG]		= { .type = NLA_U32 },
+	[NFTA_OSF_TTL]		= { .type = NLA_U8 },
+	[NFTA_OSF_FLAGS]	= { .type = NLA_U32 },
+};
+
+static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	struct nft_osf *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct sk_buff *skb = pkt->skb;
+	char os_match[NFT_OSF_MAXGENRELEN + 1];
+	const struct tcphdr *tcp;
+	struct nf_osf_data data;
+	struct tcphdr _tcph;
+
+	if (pkt->tprot != IPPROTO_TCP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	tcp = skb_header_pointer(skb, ip_hdrlen(skb),
+				 sizeof(struct tcphdr), &_tcph);
+	if (!tcp) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	if (!tcp->syn) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
+		strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
+	} else {
+		if (priv->flags & NFT_OSF_F_VERSION)
+			snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
+				 data.genre, data.version);
+		else
+			strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
+
+		strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+	}
+}
+
+static int nft_osf_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_osf *priv = nft_expr_priv(expr);
+	u32 flags;
+	int err;
+	u8 ttl;
+
+	if (!tb[NFTA_OSF_DREG])
+		return -EINVAL;
+
+	if (tb[NFTA_OSF_TTL]) {
+		ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
+		if (ttl > 2)
+			return -EINVAL;
+		priv->ttl = ttl;
+	}
+
+	if (tb[NFTA_OSF_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS]));
+		if (flags != NFT_OSF_F_VERSION)
+			return -EINVAL;
+		priv->flags = flags;
+	}
+
+	err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+				       NULL, NFT_DATA_VALUE,
+				       NFT_OSF_MAXGENRELEN);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_osf *priv = nft_expr_priv(expr);
+
+	if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, NFTA_OSF_FLAGS, ntohl((__force __be32)priv->flags)))
+		goto nla_put_failure;
+
+	if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_osf_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
+{
+	unsigned int hooks;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		hooks = (1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_PRE_ROUTING) |
+			(1 << NF_INET_FORWARD);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static bool nft_osf_reduce(struct nft_regs_track *track,
+			   const struct nft_expr *expr)
+{
+	struct nft_osf *priv = nft_expr_priv(expr);
+	struct nft_osf *osf;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN);
+		return false;
+	}
+
+	osf = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->flags != osf->flags ||
+	    priv->ttl != osf->ttl) {
+		nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+
+static struct nft_expr_type nft_osf_type;
+static const struct nft_expr_ops nft_osf_op = {
+	.eval		= nft_osf_eval,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_osf)),
+	.init		= nft_osf_init,
+	.dump		= nft_osf_dump,
+	.type		= &nft_osf_type,
+	.validate	= nft_osf_validate,
+	.reduce		= nft_osf_reduce,
+};
+
+static struct nft_expr_type nft_osf_type __read_mostly = {
+	.ops		= &nft_osf_op,
+	.name		= "osf",
+	.owner		= THIS_MODULE,
+	.policy		= nft_osf_policy,
+	.maxattr	= NFTA_OSF_MAX,
+};
+
+static int __init nft_osf_module_init(void)
+{
+	return nft_register_expr(&nft_osf_type);
+}
+
+static void __exit nft_osf_module_exit(void)
+{
+	return nft_unregister_expr(&nft_osf_type);
+}
+
+module_init(nft_osf_module_init);
+module_exit(nft_osf_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
+MODULE_ALIAS_NFT_EXPR("osf");
+MODULE_DESCRIPTION("nftables passive OS fingerprint support");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 000000000..f44f2eaf3
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,891 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/sctp/checksum.h>
+
+static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
+					 struct vlan_ethhdr *veth)
+{
+	if (skb_copy_bits(skb, mac_off, veth, ETH_HLEN))
+		return false;
+
+	veth->h_vlan_proto = skb->vlan_proto;
+	veth->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+	veth->h_vlan_encapsulated_proto = skb->protocol;
+
+	return true;
+}
+
+/* add vlan header into the user buffer for if tag was removed by offloads */
+static bool
+nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+{
+	int mac_off = skb_mac_header(skb) - skb->data;
+	u8 *vlanh, *dst_u8 = (u8 *) d;
+	struct vlan_ethhdr veth;
+	u8 vlan_hlen = 0;
+
+	if ((skb->protocol == htons(ETH_P_8021AD) ||
+	     skb->protocol == htons(ETH_P_8021Q)) &&
+	    offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
+		vlan_hlen += VLAN_HLEN;
+
+	vlanh = (u8 *) &veth;
+	if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+		u8 ethlen = len;
+
+		if (vlan_hlen &&
+		    skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
+			return false;
+		else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+			return false;
+
+		if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
+			ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+
+		memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+
+		len -= ethlen;
+		if (len == 0)
+			return true;
+
+		dst_u8 += ethlen;
+		offset = ETH_HLEN + vlan_hlen;
+	} else {
+		offset -= VLAN_HLEN + vlan_hlen;
+	}
+
+	return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+}
+
+static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
+{
+	unsigned int thoff = nft_thoff(pkt);
+
+	if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
+		return -1;
+
+	switch (pkt->tprot) {
+	case IPPROTO_UDP:
+		pkt->inneroff = thoff + sizeof(struct udphdr);
+		break;
+	case IPPROTO_TCP: {
+		struct tcphdr *th, _tcph;
+
+		th = skb_header_pointer(pkt->skb, thoff, sizeof(_tcph), &_tcph);
+		if (!th)
+			return -1;
+
+		pkt->inneroff = thoff + __tcp_hdrlen(th);
+		}
+		break;
+	default:
+		return -1;
+	}
+
+	pkt->flags |= NFT_PKTINFO_INNER;
+
+	return 0;
+}
+
+static int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
+{
+	if (!(pkt->flags & NFT_PKTINFO_INNER) &&
+	    __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0)
+		return -1;
+
+	return pkt->inneroff;
+}
+
+void nft_payload_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs,
+		      const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	u32 *dest = &regs->data[priv->dreg];
+	int offset;
+
+	if (priv->len % NFT_REG32_SIZE)
+		dest[priv->len / NFT_REG32_SIZE] = 0;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0)
+			goto err;
+
+		if (skb_vlan_tag_present(skb)) {
+			if (!nft_payload_copy_vlan(dest, skb,
+						   priv->offset, priv->len))
+				goto err;
+			return;
+		}
+		offset = skb_mac_header(skb) - skb->data;
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		offset = skb_network_offset(skb);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
+			goto err;
+		offset = nft_thoff(pkt);
+		break;
+	case NFT_PAYLOAD_INNER_HEADER:
+		offset = nft_payload_inner_offset(pkt);
+		if (offset < 0)
+			goto err;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		goto err;
+	}
+	offset += priv->offset;
+
+	if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
+		goto err;
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+	[NFTA_PAYLOAD_SREG]		= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_DREG]		= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_BASE]		= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_OFFSET]		= NLA_POLICY_MAX(NLA_BE32, 255),
+	[NFTA_PAYLOAD_LEN]		= NLA_POLICY_MAX(NLA_BE32, 255),
+	[NFTA_PAYLOAD_CSUM_TYPE]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_CSUM_OFFSET]	= NLA_POLICY_MAX(NLA_BE32, 255),
+	[NFTA_PAYLOAD_CSUM_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_payload *priv = nft_expr_priv(expr);
+
+	priv->base   = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+	return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
+					&priv->dreg, NULL, NFT_DATA_VALUE,
+					priv->len);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_payload_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct nft_payload *payload;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	payload = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->base != payload->base ||
+	    priv->offset != payload->offset ||
+	    priv->len != payload->len) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+
+static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
+				     u32 priv_len, u32 field_len)
+{
+	unsigned int remainder, delta, k;
+	struct nft_data mask = {};
+	__be32 remainder_mask;
+
+	if (priv_len == field_len) {
+		memset(&reg->mask, 0xff, priv_len);
+		return true;
+	} else if (priv_len > field_len) {
+		return false;
+	}
+
+	memset(&mask, 0xff, field_len);
+	remainder = priv_len % sizeof(u32);
+	if (remainder) {
+		k = priv_len / sizeof(u32);
+		delta = field_len - priv_len;
+		remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1));
+		mask.data[k] = (__force u32)remainder_mask;
+	}
+
+	memcpy(&reg->mask, &mask, field_len);
+
+	return true;
+}
+
+static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct ethhdr, h_source):
+		if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+				  src, ETH_ALEN, reg);
+		break;
+	case offsetof(struct ethhdr, h_dest):
+		if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+				  dst, ETH_ALEN, reg);
+		break;
+	case offsetof(struct ethhdr, h_proto):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic,
+				  n_proto, sizeof(__be16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+		break;
+	case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_VLAN, vlan,
+					vlan_tci, sizeof(__be16), reg,
+					NFT_OFFLOAD_F_NETWORK2HOST);
+		break;
+	case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
+				  vlan_tpid, sizeof(__be16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+		break;
+	case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
+					vlan_tci, sizeof(__be16), reg,
+					NFT_OFFLOAD_F_NETWORK2HOST);
+		break;
+	case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
+							sizeof(struct vlan_hdr):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
+				  vlan_tpid, sizeof(__be16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct iphdr, saddr):
+		if (!nft_payload_offload_mask(reg, priv->len,
+					      sizeof(struct in_addr)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
+				  sizeof(struct in_addr), reg);
+		nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+		break;
+	case offsetof(struct iphdr, daddr):
+		if (!nft_payload_offload_mask(reg, priv->len,
+					      sizeof(struct in_addr)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
+				  sizeof(struct in_addr), reg);
+		nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+		break;
+	case offsetof(struct iphdr, protocol):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+				  sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct ipv6hdr, saddr):
+		if (!nft_payload_offload_mask(reg, priv->len,
+					      sizeof(struct in6_addr)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
+				  sizeof(struct in6_addr), reg);
+		nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+		break;
+	case offsetof(struct ipv6hdr, daddr):
+		if (!nft_payload_offload_mask(reg, priv->len,
+					      sizeof(struct in6_addr)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
+				  sizeof(struct in6_addr), reg);
+		nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+		break;
+	case offsetof(struct ipv6hdr, nexthdr):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+				  sizeof(__u8), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_nh(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	int err;
+
+	switch (ctx->dep.l3num) {
+	case htons(ETH_P_IP):
+		err = nft_payload_offload_ip(ctx, flow, priv);
+		break;
+	case htons(ETH_P_IPV6):
+		err = nft_payload_offload_ip6(ctx, flow, priv);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
+				   struct nft_flow_rule *flow,
+				   const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct tcphdr, source):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+				  sizeof(__be16), reg);
+		break;
+	case offsetof(struct tcphdr, dest):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+				  sizeof(__be16), reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
+				   struct nft_flow_rule *flow,
+				   const struct nft_payload *priv)
+{
+	struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+	switch (priv->offset) {
+	case offsetof(struct udphdr, source):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+				  sizeof(__be16), reg);
+		break;
+	case offsetof(struct udphdr, dest):
+		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
+			return -EOPNOTSUPP;
+
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+				  sizeof(__be16), reg);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_payload_offload_th(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_payload *priv)
+{
+	int err;
+
+	switch (ctx->dep.protonum) {
+	case IPPROTO_TCP:
+		err = nft_payload_offload_tcp(ctx, flow, priv);
+		break;
+	case IPPROTO_UDP:
+		err = nft_payload_offload_udp(ctx, flow, priv);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_payload_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	int err;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		err = nft_payload_offload_ll(ctx, flow, priv);
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		err = nft_payload_offload_nh(ctx, flow, priv);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		err = nft_payload_offload_th(ctx, flow, priv);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static const struct nft_expr_ops nft_payload_ops = {
+	.type		= &nft_payload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+	.reduce		= nft_payload_reduce,
+	.offload	= nft_payload_offload,
+};
+
+const struct nft_expr_ops nft_payload_fast_ops = {
+	.type		= &nft_payload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+	.reduce		= nft_payload_reduce,
+	.offload	= nft_payload_offload,
+};
+
+static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
+{
+	*sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
+	if (*sum == 0)
+		*sum = CSUM_MANGLED_0;
+}
+
+static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
+{
+	struct udphdr *uh, _uh;
+
+	uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh);
+	if (!uh)
+		return false;
+
+	return (__force bool)uh->check;
+}
+
+static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
+				     struct sk_buff *skb,
+				     unsigned int *l4csum_offset)
+{
+	if (pkt->fragoff)
+		return -1;
+
+	switch (pkt->tprot) {
+	case IPPROTO_TCP:
+		*l4csum_offset = offsetof(struct tcphdr, check);
+		break;
+	case IPPROTO_UDP:
+		if (!nft_payload_udp_checksum(skb, nft_thoff(pkt)))
+			return -1;
+		fallthrough;
+	case IPPROTO_UDPLITE:
+		*l4csum_offset = offsetof(struct udphdr, check);
+		break;
+	case IPPROTO_ICMPV6:
+		*l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
+		break;
+	default:
+		return -1;
+	}
+
+	*l4csum_offset += nft_thoff(pkt);
+	return 0;
+}
+
+static int nft_payload_csum_sctp(struct sk_buff *skb, int offset)
+{
+	struct sctphdr *sh;
+
+	if (skb_ensure_writable(skb, offset + sizeof(*sh)))
+		return -1;
+
+	sh = (struct sctphdr *)(skb->data + offset);
+	sh->checksum = sctp_compute_cksum(skb, offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	return 0;
+}
+
+static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
+				     struct sk_buff *skb,
+				     __wsum fsum, __wsum tsum)
+{
+	int l4csum_offset;
+	__sum16 sum;
+
+	/* If we cannot determine layer 4 checksum offset or this packet doesn't
+	 * require layer 4 checksum recalculation, skip this packet.
+	 */
+	if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0)
+		return 0;
+
+	if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	/* Checksum mangling for an arbitrary amount of bytes, based on
+	 * inet_proto_csum_replace*() functions.
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		nft_csum_replace(&sum, fsum, tsum);
+		if (skb->ip_summed == CHECKSUM_COMPLETE) {
+			skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum),
+					      tsum);
+		}
+	} else {
+		sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum),
+					  tsum));
+	}
+
+	if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
+	    skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
+				 __wsum fsum, __wsum tsum, int csum_offset)
+{
+	__sum16 sum;
+
+	if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	nft_csum_replace(&sum, fsum, tsum);
+	if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
+	    skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void nft_payload_set_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload_set *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	const u32 *src = &regs->data[priv->sreg];
+	int offset, csum_offset;
+	__wsum fsum, tsum;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		if (!skb_mac_header_was_set(skb))
+			goto err;
+		offset = skb_mac_header(skb) - skb->data;
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		offset = skb_network_offset(skb);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
+			goto err;
+		offset = nft_thoff(pkt);
+		break;
+	case NFT_PAYLOAD_INNER_HEADER:
+		offset = nft_payload_inner_offset(pkt);
+		if (offset < 0)
+			goto err;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		goto err;
+	}
+
+	csum_offset = offset + priv->csum_offset;
+	offset += priv->offset;
+
+	if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) &&
+	    ((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER &&
+	      priv->base != NFT_PAYLOAD_INNER_HEADER) ||
+	     skb->ip_summed != CHECKSUM_PARTIAL)) {
+		fsum = skb_checksum(skb, offset, priv->len, 0);
+		tsum = csum_partial(src, priv->len, 0);
+
+		if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+		    nft_payload_csum_inet(skb, src, fsum, tsum, csum_offset))
+			goto err;
+
+		if (priv->csum_flags &&
+		    nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0)
+			goto err;
+	}
+
+	if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
+	    skb_store_bits(skb, offset, src, priv->len) < 0)
+		goto err;
+
+	if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
+	    pkt->tprot == IPPROTO_SCTP &&
+	    skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (pkt->fragoff == 0 &&
+		    nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+			goto err;
+	}
+
+	return;
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_payload_set_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_payload_set *priv = nft_expr_priv(expr);
+	u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+	int err;
+
+	priv->base        = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	priv->offset      = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	priv->len         = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+	if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+		csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+	if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+		err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+					  &csum_offset);
+		if (err < 0)
+			return err;
+
+		priv->csum_offset = csum_offset;
+	}
+	if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
+		u32 flags;
+
+		flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS]));
+		if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR)
+			return -EINVAL;
+
+		priv->csum_flags = flags;
+	}
+
+	switch (csum_type) {
+	case NFT_PAYLOAD_CSUM_NONE:
+	case NFT_PAYLOAD_CSUM_INET:
+		break;
+	case NFT_PAYLOAD_CSUM_SCTP:
+		if (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER)
+			return -EINVAL;
+
+		if (priv->csum_offset != offsetof(struct sctphdr, checksum))
+			return -EINVAL;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	priv->csum_type = csum_type;
+
+	return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+				       priv->len);
+}
+
+static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_payload_set *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
+			 htonl(priv->csum_offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_payload_set_reduce(struct nft_regs_track *track,
+				   const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_payload_ops &&
+		    track->regs[i].selector->ops != &nft_payload_fast_ops)
+			continue;
+
+		__nft_reg_track_cancel(track, i);
+	}
+
+	return false;
+}
+
+static const struct nft_expr_ops nft_payload_set_ops = {
+	.type		= &nft_payload_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
+	.eval		= nft_payload_set_eval,
+	.init		= nft_payload_set_init,
+	.dump		= nft_payload_set_dump,
+	.reduce		= nft_payload_set_reduce,
+};
+
+static const struct nft_expr_ops *
+nft_payload_select_ops(const struct nft_ctx *ctx,
+		       const struct nlattr * const tb[])
+{
+	enum nft_payload_bases base;
+	unsigned int offset, len;
+	int err;
+
+	if (tb[NFTA_PAYLOAD_BASE] == NULL ||
+	    tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+	    tb[NFTA_PAYLOAD_LEN] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	switch (base) {
+	case NFT_PAYLOAD_LL_HEADER:
+	case NFT_PAYLOAD_NETWORK_HEADER:
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+	case NFT_PAYLOAD_INNER_HEADER:
+		break;
+	default:
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if (tb[NFTA_PAYLOAD_SREG] != NULL) {
+		if (tb[NFTA_PAYLOAD_DREG] != NULL)
+			return ERR_PTR(-EINVAL);
+		return &nft_payload_set_ops;
+	}
+
+	if (tb[NFTA_PAYLOAD_DREG] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
+	    base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
+		return &nft_payload_fast_ops;
+	else
+		return &nft_payload_ops;
+}
+
+struct nft_expr_type nft_payload_type __read_mostly = {
+	.name		= "payload",
+	.select_ops	= nft_payload_select_ops,
+	.policy		= nft_payload_policy,
+	.maxattr	= NFTA_PAYLOAD_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
new file mode 100644
index 000000000..da29e92c0
--- /dev/null
+++ b/net/netfilter/nft_queue.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * Development of this code partly funded by OISF
+ * (http://www.openinfosecfoundation.org/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_queue.h>
+
+static u32 jhash_initval __read_mostly;
+
+struct nft_queue {
+	u8	sreg_qnum;
+	u16	queuenum;
+	u16	queues_total;
+	u16	flags;
+};
+
+static void nft_queue_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 queue = priv->queuenum;
+	u32 ret;
+
+	if (priv->queues_total > 1) {
+		if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
+			int cpu = raw_smp_processor_id();
+
+			queue = priv->queuenum + cpu % priv->queues_total;
+		} else {
+			queue = nfqueue_hash(pkt->skb, queue,
+					     priv->queues_total, nft_pf(pkt),
+					     jhash_initval);
+		}
+	}
+
+	ret = NF_QUEUE_NR(queue);
+	if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	regs->verdict.code = ret;
+}
+
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 queue, ret;
+
+	queue = regs->data[priv->sreg_qnum];
+
+	ret = NF_QUEUE_NR(queue);
+	if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	regs->verdict.code = ret;
+}
+
+static int nft_queue_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
+{
+	static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+						     (1 << NF_INET_LOCAL_IN) |
+						     (1 << NF_INET_FORWARD) |
+						     (1 << NF_INET_LOCAL_OUT) |
+						     (1 << NF_INET_POST_ROUTING));
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+	case NFPROTO_BRIDGE:
+		break;
+	case NFPROTO_NETDEV: /* lacks okfn */
+		fallthrough;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
+static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
+	[NFTA_QUEUE_NUM]	= { .type = NLA_U16 },
+	[NFTA_QUEUE_TOTAL]	= { .type = NLA_U16 },
+	[NFTA_QUEUE_FLAGS]	= { .type = NLA_U16 },
+	[NFTA_QUEUE_SREG_QNUM]	= { .type = NLA_U32 },
+};
+
+static int nft_queue_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 maxid;
+
+	priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
+
+	if (tb[NFTA_QUEUE_TOTAL])
+		priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
+	else
+		priv->queues_total = 1;
+
+	if (priv->queues_total == 0)
+		return -EINVAL;
+
+	maxid = priv->queues_total - 1 + priv->queuenum;
+	if (maxid > U16_MAX)
+		return -ERANGE;
+
+	if (tb[NFTA_QUEUE_FLAGS]) {
+		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int nft_queue_sreg_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	int err;
+
+	err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+				      &priv->sreg_qnum, sizeof(u32));
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_QUEUE_FLAGS]) {
+		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+			return -EINVAL;
+		if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_queue *priv = nft_expr_priv(expr);
+
+	if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) ||
+	    nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) ||
+	    nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int
+nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_queue *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) ||
+	    nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_queue_type;
+static const struct nft_expr_ops nft_queue_ops = {
+	.type		= &nft_queue_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+	.eval		= nft_queue_eval,
+	.init		= nft_queue_init,
+	.dump		= nft_queue_dump,
+	.validate	= nft_queue_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops nft_queue_sreg_ops = {
+	.type		= &nft_queue_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+	.eval		= nft_queue_sreg_eval,
+	.init		= nft_queue_sreg_init,
+	.dump		= nft_queue_sreg_dump,
+	.validate	= nft_queue_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops *
+nft_queue_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM])
+		return ERR_PTR(-EINVAL);
+
+	init_hashrandom(&jhash_initval);
+
+	if (tb[NFTA_QUEUE_NUM])
+		return &nft_queue_ops;
+
+	if (tb[NFTA_QUEUE_SREG_QNUM])
+		return &nft_queue_sreg_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_queue_type __read_mostly = {
+	.name		= "queue",
+	.select_ops	= nft_queue_select_ops,
+	.policy		= nft_queue_policy,
+	.maxattr	= NFTA_QUEUE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_queue_module_init(void)
+{
+	return nft_register_expr(&nft_queue_type);
+}
+
+static void __exit nft_queue_module_exit(void)
+{
+	nft_unregister_expr(&nft_queue_type);
+}
+
+module_init(nft_queue_module_init);
+module_exit(nft_queue_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Leblond <eric@regit.org>");
+MODULE_ALIAS_NFT_EXPR("queue");
+MODULE_DESCRIPTION("Netfilter nftables queue module");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 000000000..410a5fcf8
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+	atomic64_t	quota;
+	unsigned long	flags;
+	atomic64_t	*consumed;
+};
+
+static inline bool nft_overquota(struct nft_quota *priv,
+				 const struct sk_buff *skb)
+{
+	return atomic64_add_return(skb->len, priv->consumed) >=
+	       atomic64_read(&priv->quota);
+}
+
+static inline bool nft_quota_invert(struct nft_quota *priv)
+{
+	return priv->flags & NFT_QUOTA_F_INV;
+}
+
+static inline void nft_quota_do_eval(struct nft_quota *priv,
+				     struct nft_regs *regs,
+				     const struct nft_pktinfo *pkt)
+{
+	if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+	[NFTA_QUOTA_BYTES]	= { .type = NLA_U64 },
+	[NFTA_QUOTA_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_QUOTA_CONSUMED]	= { .type = NLA_U64 },
+};
+
+#define NFT_QUOTA_DEPLETED_BIT	1	/* From NFT_QUOTA_F_DEPLETED. */
+
+static void nft_quota_obj_eval(struct nft_object *obj,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_quota *priv = nft_obj_data(obj);
+	bool overquota;
+
+	overquota = nft_overquota(priv, pkt->skb);
+	if (overquota ^ nft_quota_invert(priv))
+		regs->verdict.code = NFT_BREAK;
+
+	if (overquota &&
+	    !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
+		nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
+			       NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
+}
+
+static int nft_quota_do_init(const struct nlattr * const tb[],
+			     struct nft_quota *priv)
+{
+	unsigned long flags = 0;
+	u64 quota, consumed = 0;
+
+	if (!tb[NFTA_QUOTA_BYTES])
+		return -EINVAL;
+
+	quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+	if (quota > S64_MAX)
+		return -EOVERFLOW;
+
+	if (tb[NFTA_QUOTA_CONSUMED]) {
+		consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED]));
+		if (consumed > quota)
+			return -EINVAL;
+	}
+
+	if (tb[NFTA_QUOTA_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+		if (flags & ~NFT_QUOTA_F_INV)
+			return -EINVAL;
+		if (flags & NFT_QUOTA_F_DEPLETED)
+			return -EOPNOTSUPP;
+	}
+
+	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
+	if (!priv->consumed)
+		return -ENOMEM;
+
+	atomic64_set(&priv->quota, quota);
+	priv->flags = flags;
+	atomic64_set(priv->consumed, consumed);
+
+	return 0;
+}
+
+static void nft_quota_do_destroy(const struct nft_ctx *ctx,
+				 struct nft_quota *priv)
+{
+	kfree(priv->consumed);
+}
+
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+			      const struct nlattr * const tb[],
+			      struct nft_object *obj)
+{
+	struct nft_quota *priv = nft_obj_data(obj);
+
+	return nft_quota_do_init(tb, priv);
+}
+
+static void nft_quota_obj_update(struct nft_object *obj,
+				 struct nft_object *newobj)
+{
+	struct nft_quota *newpriv = nft_obj_data(newobj);
+	struct nft_quota *priv = nft_obj_data(obj);
+	u64 newquota;
+
+	newquota = atomic64_read(&newpriv->quota);
+	atomic64_set(&priv->quota, newquota);
+	priv->flags = newpriv->flags;
+}
+
+static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
+			     bool reset)
+{
+	u64 consumed, consumed_cap, quota;
+	u32 flags = priv->flags;
+
+	/* Since we inconditionally increment consumed quota for each packet
+	 * that we see, don't go over the quota boundary in what we send to
+	 * userspace.
+	 */
+	consumed = atomic64_read(priv->consumed);
+	quota = atomic64_read(&priv->quota);
+	if (consumed >= quota) {
+		consumed_cap = quota;
+		flags |= NFT_QUOTA_F_DEPLETED;
+	} else {
+		consumed_cap = consumed;
+	}
+
+	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(quota),
+			 NFTA_QUOTA_PAD) ||
+	    nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap),
+			 NFTA_QUOTA_PAD) ||
+	    nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+
+	if (reset) {
+		atomic64_sub(consumed, priv->consumed);
+		clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
+	}
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+			      bool reset)
+{
+	struct nft_quota *priv = nft_obj_data(obj);
+
+	return nft_quota_do_dump(skb, priv, reset);
+}
+
+static void nft_quota_obj_destroy(const struct nft_ctx *ctx,
+				  struct nft_object *obj)
+{
+	struct nft_quota *priv = nft_obj_data(obj);
+
+	return nft_quota_do_destroy(ctx, priv);
+}
+
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+	.type		= &nft_quota_obj_type,
+	.size		= sizeof(struct nft_quota),
+	.init		= nft_quota_obj_init,
+	.destroy	= nft_quota_obj_destroy,
+	.eval		= nft_quota_obj_eval,
+	.dump		= nft_quota_obj_dump,
+	.update		= nft_quota_obj_update,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_QUOTA,
+	.ops		= &nft_quota_obj_ops,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.policy		= nft_quota_policy,
+	.owner		= THIS_MODULE,
+};
+
+static void nft_quota_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	nft_quota_do_eval(priv, regs, pkt);
+}
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	return nft_quota_do_init(tb, priv);
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	return nft_quota_do_dump(skb, priv, false);
+}
+
+static void nft_quota_destroy(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	return nft_quota_do_destroy(ctx, priv);
+}
+
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_quota *priv_dst = nft_expr_priv(dst);
+	struct nft_quota *priv_src = nft_expr_priv(src);
+
+	priv_dst->quota = priv_src->quota;
+	priv_dst->flags = priv_src->flags;
+
+	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+	if (!priv_dst->consumed)
+		return -ENOMEM;
+
+	*priv_dst->consumed = *priv_src->consumed;
+
+	return 0;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+	.type		= &nft_quota_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+	.eval		= nft_quota_eval,
+	.init		= nft_quota_init,
+	.destroy	= nft_quota_destroy,
+	.clone		= nft_quota_clone,
+	.dump		= nft_quota_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+	.name		= "quota",
+	.ops		= &nft_quota_ops,
+	.policy		= nft_quota_policy,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+	int err;
+
+	err = nft_register_obj(&nft_quota_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_quota_type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err1:
+	nft_unregister_obj(&nft_quota_obj_type);
+	return err;
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+	nft_unregister_expr(&nft_quota_type);
+	nft_unregister_obj(&nft_quota_obj_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA);
+MODULE_DESCRIPTION("Netfilter nftables quota module");
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
new file mode 100644
index 000000000..832f0d725
--- /dev/null
+++ b/net/netfilter/nft_range.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_range_expr {
+	struct nft_data		data_from;
+	struct nft_data		data_to;
+	u8			sreg;
+	u8			len;
+	enum nft_range_ops	op:8;
+};
+
+void nft_range_eval(const struct nft_expr *expr,
+		    struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+	int d1, d2;
+
+	d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
+	d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
+	switch (priv->op) {
+	case NFT_RANGE_EQ:
+		if (d1 < 0 || d2 > 0)
+			regs->verdict.code = NFT_BREAK;
+		break;
+	case NFT_RANGE_NEQ:
+		if (d1 >= 0 && d2 <= 0)
+			regs->verdict.code = NFT_BREAK;
+		break;
+	}
+}
+
+static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
+	[NFTA_RANGE_SREG]		= { .type = NLA_U32 },
+	[NFTA_RANGE_OP]			= { .type = NLA_U32 },
+	[NFTA_RANGE_FROM_DATA]		= { .type = NLA_NESTED },
+	[NFTA_RANGE_TO_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_range_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc_from = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->data_from),
+	};
+	struct nft_data_desc desc_to = {
+		.type	= NFT_DATA_VALUE,
+		.size	= sizeof(priv->data_to),
+	};
+	int err;
+	u32 op;
+
+	if (!tb[NFTA_RANGE_SREG]      ||
+	    !tb[NFTA_RANGE_OP]	      ||
+	    !tb[NFTA_RANGE_FROM_DATA] ||
+	    !tb[NFTA_RANGE_TO_DATA])
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->data_from, &desc_from,
+			    tb[NFTA_RANGE_FROM_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(NULL, &priv->data_to, &desc_to,
+			    tb[NFTA_RANGE_TO_DATA]);
+	if (err < 0)
+		goto err1;
+
+	if (desc_from.len != desc_to.len) {
+		err = -EINVAL;
+		goto err2;
+	}
+
+	err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+				      desc_from.len);
+	if (err < 0)
+		goto err2;
+
+	err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op);
+	if (err < 0)
+		goto err2;
+
+	switch (op) {
+	case NFT_RANGE_EQ:
+	case NFT_RANGE_NEQ:
+		break;
+	default:
+		err = -EINVAL;
+		goto err2;
+	}
+
+	priv->op  = op;
+	priv->len = desc_from.len;
+	return 0;
+err2:
+	nft_data_release(&priv->data_to, desc_to.type);
+err1:
+	nft_data_release(&priv->data_from, desc_from.type);
+	return err;
+}
+
+static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from,
+			  NFT_DATA_VALUE, priv->len) < 0 ||
+	    nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nft_expr_ops nft_range_ops = {
+	.type		= &nft_range_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
+	.eval		= nft_range_eval,
+	.init		= nft_range_init,
+	.dump		= nft_range_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+struct nft_expr_type nft_range_type __read_mostly = {
+	.name		= "range",
+	.ops		= &nft_range_ops,
+	.policy		= nft_range_policy,
+	.maxattr	= NFTA_RANGE_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
new file mode 100644
index 000000000..08b408d3e
--- /dev/null
+++ b/net/netfilter/nft_redir.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_redir {
+	u8			sreg_proto_min;
+	u8			sreg_proto_max;
+	u16			flags;
+};
+
+static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
+	[NFTA_REDIR_REG_PROTO_MIN]	= { .type = NLA_U32 },
+	[NFTA_REDIR_REG_PROTO_MAX]	= { .type = NLA_U32 },
+	[NFTA_REDIR_FLAGS]		= { .type = NLA_U32 },
+};
+
+static int nft_redir_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
+{
+	int err;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_PRE_ROUTING) |
+					(1 << NF_INET_LOCAL_OUT));
+}
+
+static int nft_redir_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_redir *priv = nft_expr_priv(expr);
+	unsigned int plen;
+	int err;
+
+	plen = sizeof_field(struct nf_nat_range, min_proto.all);
+	if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
+		err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+					      &priv->sreg_proto_min, plen);
+		if (err < 0)
+			return err;
+
+		if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
+			err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+						      &priv->sreg_proto_max,
+						      plen);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_proto_max = priv->sreg_proto_min;
+		}
+
+		priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	if (tb[NFTA_REDIR_FLAGS]) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS]));
+		if (priv->flags & ~NF_NAT_RANGE_MASK)
+			return -EINVAL;
+	}
+
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_redir *priv = nft_expr_priv(expr);
+
+	if (priv->sreg_proto_min) {
+		if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN,
+				      priv->sreg_proto_min))
+			goto nla_put_failure;
+		if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX,
+				      priv->sreg_proto_max))
+			goto nla_put_failure;
+	}
+
+	if (priv->flags != 0 &&
+	    nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_redir_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	const struct nft_redir *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)
+			nft_reg_load16(&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)
+			nft_reg_load16(&regs->data[priv->sreg_proto_max]);
+	}
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range,
+							  nft_hook(pkt));
+		break;
+#ifdef CONFIG_NF_TABLES_IPV6
+	case NFPROTO_IPV6:
+		regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
+							  nft_hook(pkt));
+		break;
+#endif
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_redir_ipv4_type;
+static const struct nft_expr_ops nft_redir_ipv4_ops = {
+	.type		= &nft_redir_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv4_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv4_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_redir_ipv6_type;
+static const struct nft_expr_ops nft_redir_ipv6_ops = {
+	.type		= &nft_redir_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv6_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv6_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_INET
+static void
+nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_INET);
+}
+
+static struct nft_expr_type nft_redir_inet_type;
+static const struct nft_expr_ops nft_redir_inet_ops = {
+	.type		= &nft_redir_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_inet_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "redir",
+	.ops		= &nft_redir_inet_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_redir_module_init_inet(void)
+{
+	return nft_register_expr(&nft_redir_inet_type);
+}
+#else
+static inline int nft_redir_module_init_inet(void) { return 0; }
+#endif
+
+static int __init nft_redir_module_init(void)
+{
+	int ret = nft_register_expr(&nft_redir_ipv4_type);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_NF_TABLES_IPV6
+	ret = nft_register_expr(&nft_redir_ipv6_type);
+	if (ret) {
+		nft_unregister_expr(&nft_redir_ipv4_type);
+		return ret;
+	}
+#endif
+
+	ret = nft_redir_module_init_inet();
+	if (ret < 0) {
+		nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+		nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit nft_redir_module_exit(void)
+{
+	nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+#ifdef CONFIG_NF_TABLES_INET
+	nft_unregister_expr(&nft_redir_inet_type);
+#endif
+}
+
+module_init(nft_redir_module_init);
+module_exit(nft_redir_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_EXPR("redir");
+MODULE_DESCRIPTION("Netfilter nftables redirect support");
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
new file mode 100644
index 000000000..927ff8459
--- /dev/null
+++ b/net/netfilter/nft_reject.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+
+const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+	[NFTA_REJECT_TYPE]		= { .type = NLA_U32 },
+	[NFTA_REJECT_ICMP_CODE]		= { .type = NLA_U8 },
+};
+EXPORT_SYMBOL_GPL(nft_reject_policy);
+
+int nft_reject_validate(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_LOCAL_IN) |
+					(1 << NF_INET_FORWARD) |
+					(1 << NF_INET_LOCAL_OUT) |
+					(1 << NF_INET_PRE_ROUTING));
+}
+EXPORT_SYMBOL_GPL(nft_reject_validate);
+
+int nft_reject_init(const struct nft_ctx *ctx,
+		    const struct nft_expr *expr,
+		    const struct nlattr * const tb[])
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+	int icmp_code;
+
+	if (tb[NFTA_REJECT_TYPE] == NULL)
+		return -EINVAL;
+
+	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+			return -EINVAL;
+
+		icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+		if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+		    icmp_code > NFT_REJECT_ICMPX_MAX)
+			return -EINVAL;
+
+		priv->icmp_code = icmp_code;
+		break;
+	case NFT_REJECT_TCP_RST:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_reject_init);
+
+int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+		goto nla_put_failure;
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+			goto nla_put_failure;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nft_reject_dump);
+
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
+	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMP_NET_UNREACH,
+	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMP_PORT_UNREACH,
+	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMP_HOST_UNREACH,
+	[NFT_REJECT_ICMPX_ADMIN_PROHIBITED]	= ICMP_PKT_FILTERED,
+};
+
+int nft_reject_icmp_code(u8 code)
+{
+	if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+		return ICMP_NET_UNREACH;
+
+	return icmp_code_v4[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
+
+
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
+	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMPV6_NOROUTE,
+	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMPV6_PORT_UNREACH,
+	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMPV6_ADDR_UNREACH,
+	[NFT_REJECT_ICMPX_ADMIN_PROHIBITED]	= ICMPV6_ADM_PROHIBITED,
+};
+
+int nft_reject_icmpv6_code(u8 code)
+{
+	if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+		return ICMPV6_NOROUTE;
+
+	return icmp_code_v6[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Netfilter x_tables over nftables module");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
new file mode 100644
index 000000000..973fa31a9
--- /dev/null
+++ b/net/netfilter/nft_reject_inet.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+static void nft_reject_inet_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach(pkt->skb, priv->icmp_code,
+					nft_hook(pkt));
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset(nft_net(pkt), nft_sk(pkt),
+				      pkt->skb, nft_hook(pkt));
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach(pkt->skb,
+					nft_reject_icmp_code(priv->icmp_code),
+					nft_hook(pkt));
+			break;
+		}
+		break;
+	case NFPROTO_IPV6:
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach6(nft_net(pkt), pkt->skb,
+					 priv->icmp_code, nft_hook(pkt));
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset6(nft_net(pkt), nft_sk(pkt),
+				       pkt->skb, nft_hook(pkt));
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach6(nft_net(pkt), pkt->skb,
+					 nft_reject_icmpv6_code(priv->icmp_code),
+					 nft_hook(pkt));
+			break;
+		}
+		break;
+	}
+
+	regs->verdict.code = NF_DROP;
+}
+
+static int nft_reject_inet_validate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr,
+				    const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_LOCAL_IN) |
+					(1 << NF_INET_FORWARD) |
+					(1 << NF_INET_LOCAL_OUT) |
+					(1 << NF_INET_PRE_ROUTING) |
+					(1 << NF_INET_INGRESS));
+}
+
+static struct nft_expr_type nft_reject_inet_type;
+static const struct nft_expr_ops nft_reject_inet_ops = {
+	.type		= &nft_reject_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_inet_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+	.validate	= nft_reject_inet_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_reject_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "reject",
+	.ops		= &nft_reject_inet_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_inet_module_init(void)
+{
+	return nft_register_expr(&nft_reject_inet_type);
+}
+
+static void __exit nft_reject_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_inet_type);
+}
+
+module_init(nft_reject_inet_module_init);
+module_exit(nft_reject_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
+MODULE_DESCRIPTION("Netfilter nftables reject inet support");
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
new file mode 100644
index 000000000..7865cd8b1
--- /dev/null
+++ b/net/netfilter/nft_reject_netdev.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Laura Garcia Liebana <nevola@gmail.com>
+ * Copyright (c) 2020 Jose M. Guisado <guigom@riseup.net>
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+static void nft_reject_queue_xmit(struct sk_buff *nskb, struct sk_buff *oldskb)
+{
+	dev_hard_header(nskb, nskb->dev, ntohs(oldskb->protocol),
+			eth_hdr(oldskb)->h_source, eth_hdr(oldskb)->h_dest,
+			nskb->len);
+	dev_queue_xmit(nskb);
+}
+
+static void nft_reject_netdev_send_v4_tcp_reset(struct net *net,
+						struct sk_buff *oldskb,
+						const struct net_device *dev,
+						int hook)
+{
+	struct sk_buff *nskb;
+
+	nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
+	if (!nskb)
+		return;
+
+	nft_reject_queue_xmit(nskb, oldskb);
+}
+
+static void nft_reject_netdev_send_v4_unreach(struct net *net,
+					      struct sk_buff *oldskb,
+					      const struct net_device *dev,
+					      int hook, u8 code)
+{
+	struct sk_buff *nskb;
+
+	nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
+	if (!nskb)
+		return;
+
+	nft_reject_queue_xmit(nskb, oldskb);
+}
+
+static void nft_reject_netdev_send_v6_tcp_reset(struct net *net,
+						struct sk_buff *oldskb,
+						const struct net_device *dev,
+						int hook)
+{
+	struct sk_buff *nskb;
+
+	nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
+	if (!nskb)
+		return;
+
+	nft_reject_queue_xmit(nskb, oldskb);
+}
+
+
+static void nft_reject_netdev_send_v6_unreach(struct net *net,
+					      struct sk_buff *oldskb,
+					      const struct net_device *dev,
+					      int hook, u8 code)
+{
+	struct sk_buff *nskb;
+
+	nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
+	if (!nskb)
+		return;
+
+	nft_reject_queue_xmit(nskb, oldskb);
+}
+
+static void nft_reject_netdev_eval(const struct nft_expr *expr,
+				   struct nft_regs *regs,
+				   const struct nft_pktinfo *pkt)
+{
+	struct ethhdr *eth = eth_hdr(pkt->skb);
+	struct nft_reject *priv = nft_expr_priv(expr);
+	const unsigned char *dest = eth->h_dest;
+
+	if (is_broadcast_ether_addr(dest) ||
+	    is_multicast_ether_addr(dest))
+		goto out;
+
+	switch (eth->h_proto) {
+	case htons(ETH_P_IP):
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nft_reject_netdev_send_v4_unreach(nft_net(pkt), pkt->skb,
+							  nft_in(pkt),
+							  nft_hook(pkt),
+							  priv->icmp_code);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nft_reject_netdev_send_v4_tcp_reset(nft_net(pkt), pkt->skb,
+							    nft_in(pkt),
+							    nft_hook(pkt));
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nft_reject_netdev_send_v4_unreach(nft_net(pkt), pkt->skb,
+							  nft_in(pkt),
+							  nft_hook(pkt),
+							  nft_reject_icmp_code(priv->icmp_code));
+			break;
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nft_reject_netdev_send_v6_unreach(nft_net(pkt), pkt->skb,
+							  nft_in(pkt),
+							  nft_hook(pkt),
+							  priv->icmp_code);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nft_reject_netdev_send_v6_tcp_reset(nft_net(pkt), pkt->skb,
+							    nft_in(pkt),
+							    nft_hook(pkt));
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nft_reject_netdev_send_v6_unreach(nft_net(pkt), pkt->skb,
+							  nft_in(pkt),
+							  nft_hook(pkt),
+							  nft_reject_icmpv6_code(priv->icmp_code));
+			break;
+		}
+		break;
+	default:
+		/* No explicit way to reject this protocol, drop it. */
+		break;
+	}
+out:
+	regs->verdict.code = NF_DROP;
+}
+
+static int nft_reject_netdev_validate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr,
+				      const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
+}
+
+static struct nft_expr_type nft_reject_netdev_type;
+static const struct nft_expr_ops nft_reject_netdev_ops = {
+	.type		= &nft_reject_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_netdev_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+	.validate	= nft_reject_netdev_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_reject_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "reject",
+	.ops		= &nft_reject_netdev_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_reject_netdev_type);
+}
+
+static void __exit nft_reject_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_netdev_type);
+}
+
+module_init(nft_reject_netdev_module_init);
+module_exit(nft_reject_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia Liebana <nevola@gmail.com>");
+MODULE_AUTHOR("Jose M. Guisado <guigom@riseup.net>");
+MODULE_DESCRIPTION("Reject packets from netdev via nftables");
+MODULE_ALIAS_NFT_AF_EXPR(5, "reject");
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
new file mode 100644
index 000000000..7d21e1649
--- /dev/null
+++ b/net/netfilter/nft_rt.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_rt {
+	enum nft_rt_keys	key:8;
+	u8			dreg;
+};
+
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+	u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+	const struct sk_buff *skb = pkt->skb;
+	struct dst_entry *dst = NULL;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+		minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+		break;
+	case NFPROTO_IPV6:
+		fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+		minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+		break;
+	}
+
+	nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+	if (dst) {
+		mtu = min(mtu, dst_mtu(dst));
+		dst_release(dst);
+	}
+
+	if (mtu <= minlen || mtu > 0xffff)
+		return TCP_MSS_DEFAULT;
+
+	return mtu - minlen;
+}
+
+void nft_rt_get_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt)
+{
+	const struct nft_rt *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	u32 *dest = &regs->data[priv->dreg];
+	const struct dst_entry *dst;
+
+	dst = skb_dst(skb);
+	if (!dst)
+		goto err;
+
+	switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	case NFT_RT_CLASSID:
+		*dest = dst->tclassid;
+		break;
+#endif
+	case NFT_RT_NEXTHOP4:
+		if (nft_pf(pkt) != NFPROTO_IPV4)
+			goto err;
+
+		*dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+						ip_hdr(skb)->daddr);
+		break;
+	case NFT_RT_NEXTHOP6:
+		if (nft_pf(pkt) != NFPROTO_IPV6)
+			goto err;
+
+		memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+					 &ipv6_hdr(skb)->daddr),
+		       sizeof(struct in6_addr));
+		break;
+	case NFT_RT_TCPMSS:
+		nft_reg_store16(dest, get_tcpmss(pkt, dst));
+		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		nft_reg_store8(dest, !!dst->xfrm);
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		goto err;
+	}
+	return;
+
+err:
+	regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+	[NFTA_RT_DREG]		= { .type = NLA_U32 },
+	[NFTA_RT_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_rt_get_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_rt *priv = nft_expr_priv(expr);
+	unsigned int len;
+
+	if (tb[NFTA_RT_KEY] == NULL ||
+	    tb[NFTA_RT_DREG] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY]));
+	switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	case NFT_RT_CLASSID:
+#endif
+	case NFT_RT_NEXTHOP4:
+		len = sizeof(u32);
+		break;
+	case NFT_RT_NEXTHOP6:
+		len = sizeof(struct in6_addr);
+		break;
+	case NFT_RT_TCPMSS:
+		len = sizeof(u16);
+		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		len = sizeof(u8);
+		break;
+#endif
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, len);
+}
+
+static int nft_rt_get_dump(struct sk_buff *skb,
+			   const struct nft_expr *expr)
+{
+	const struct nft_rt *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			   const struct nft_data **data)
+{
+	const struct nft_rt *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	switch (priv->key) {
+	case NFT_RT_NEXTHOP4:
+	case NFT_RT_NEXTHOP6:
+	case NFT_RT_CLASSID:
+	case NFT_RT_XFRM:
+		return 0;
+	case NFT_RT_TCPMSS:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_OUT) |
+			(1 << NF_INET_POST_ROUTING);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static const struct nft_expr_ops nft_rt_get_ops = {
+	.type		= &nft_rt_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_rt)),
+	.eval		= nft_rt_get_eval,
+	.init		= nft_rt_get_init,
+	.dump		= nft_rt_get_dump,
+	.validate	= nft_rt_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+struct nft_expr_type nft_rt_type __read_mostly = {
+	.name		= "rt",
+	.ops		= &nft_rt_get_ops,
+	.policy		= nft_rt_policy,
+	.maxattr	= NFTA_RT_MAX,
+	.owner		= THIS_MODULE,
+};
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
new file mode 100644
index 000000000..1e5e7a181
--- /dev/null
+++ b/net/netfilter/nft_set_bitmap.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_bitmap_elem {
+	struct list_head	head;
+	struct nft_set_ext	ext;
+};
+
+/* This bitmap uses two bits to represent one element. These two bits determine
+ * the element state in the current and the future generation.
+ *
+ * An element can be in three states. The generation cursor is represented using
+ * the ^ character, note that this cursor shifts on every successful transaction.
+ * If no transaction is going on, we observe all elements are in the following
+ * state:
+ *
+ * 11 = this element is active in the current generation. In case of no updates,
+ * ^    it stays active in the next generation.
+ * 00 = this element is inactive in the current generation. In case of no
+ * ^    updates, it stays inactive in the next generation.
+ *
+ * On transaction handling, we observe these two temporary states:
+ *
+ * 01 = this element is inactive in the current generation and it becomes active
+ * ^    in the next one. This happens when the element is inserted but commit
+ *      path has not yet been executed yet, so activation is still pending. On
+ *      transaction abortion, the element is removed.
+ * 10 = this element is active in the current generation and it becomes inactive
+ * ^    in the next one. This happens when the element is deactivated but commit
+ *      path has not yet been executed yet, so removal is still pending. On
+ *      transaction abortion, the next generation bit is reset to go back to
+ *      restore its previous state.
+ */
+struct nft_bitmap {
+	struct	list_head	list;
+	u16			bitmap_size;
+	u8			bitmap[];
+};
+
+static inline void nft_bitmap_location(const struct nft_set *set,
+				       const void *key,
+				       u32 *idx, u32 *off)
+{
+	u32 k;
+
+	if (set->klen == 2)
+		k = *(u16 *)key;
+	else
+		k = *(u8 *)key;
+	k <<= 1;
+
+	*idx = k / BITS_PER_BYTE;
+	*off = k % BITS_PER_BYTE;
+}
+
+/* Fetch the two bits that represent the element and check if it is active based
+ * on the generation mask.
+ */
+static inline bool
+nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
+{
+	return (bitmap[idx] & (0x3 << off)) & (genmask << off);
+}
+
+INDIRECT_CALLABLE_SCOPE
+bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	u32 idx, off;
+
+	nft_bitmap_location(set, key, &idx, &off);
+
+	return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+}
+
+static struct nft_bitmap_elem *
+nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+		     u8 genmask)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (memcmp(nft_set_ext_key(&be->ext),
+			   nft_set_ext_key(&this->ext), set->klen) ||
+		    !nft_set_elem_active(&be->ext, genmask))
+			continue;
+
+		return be;
+	}
+	return NULL;
+}
+
+static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem, unsigned int flags)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_bitmap_elem *be;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
+		    !nft_set_elem_active(&be->ext, genmask))
+			continue;
+
+		return be;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
+			     const struct nft_set_elem *elem,
+			     struct nft_set_ext **ext)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *new = elem->priv, *be;
+	u8 genmask = nft_genmask_next(net);
+	u32 idx, off;
+
+	be = nft_bitmap_elem_find(set, new, genmask);
+	if (be) {
+		*ext = &be->ext;
+		return -EEXIST;
+	}
+
+	nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off);
+	/* Enter 01 state. */
+	priv->bitmap[idx] |= (genmask << off);
+	list_add_tail_rcu(&new->head, &priv->list);
+
+	return 0;
+}
+
+static void nft_bitmap_remove(const struct net *net,
+			      const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be = elem->priv;
+	u8 genmask = nft_genmask_next(net);
+	u32 idx, off;
+
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
+	/* Enter 00 state. */
+	priv->bitmap[idx] &= ~(genmask << off);
+	list_del_rcu(&be->head);
+}
+
+static void nft_bitmap_activate(const struct net *net,
+				const struct nft_set *set,
+				const struct nft_set_elem *elem)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be = elem->priv;
+	u8 genmask = nft_genmask_next(net);
+	u32 idx, off;
+
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
+	/* Enter 11 state. */
+	priv->bitmap[idx] |= (genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
+}
+
+static bool nft_bitmap_flush(const struct net *net,
+			     const struct nft_set *set, void *_be)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_next(net);
+	struct nft_bitmap_elem *be = _be;
+	u32 idx, off;
+
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
+	/* Enter 10 state, similar to deactivation. */
+	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
+
+	return true;
+}
+
+static void *nft_bitmap_deactivate(const struct net *net,
+				   const struct nft_set *set,
+				   const struct nft_set_elem *elem)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *this = elem->priv, *be;
+	u8 genmask = nft_genmask_next(net);
+	u32 idx, off;
+
+	nft_bitmap_location(set, elem->key.val.data, &idx, &off);
+
+	be = nft_bitmap_elem_find(set, this, genmask);
+	if (!be)
+		return NULL;
+
+	/* Enter 10 state. */
+	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
+
+	return be;
+}
+
+static void nft_bitmap_walk(const struct nft_ctx *ctx,
+			    struct nft_set *set,
+			    struct nft_set_iter *iter)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be;
+	struct nft_set_elem elem;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (iter->count < iter->skip)
+			goto cont;
+		if (!nft_set_elem_active(&be->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = be;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+
+		if (iter->err < 0)
+			return;
+cont:
+		iter->count++;
+	}
+}
+
+/* The bitmap size is pow(2, key length in bits) / bits per byte. This is
+ * multiplied by two since each element takes two bits. For 8 bit keys, the
+ * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes.
+ */
+static inline u32 nft_bitmap_size(u32 klen)
+{
+	return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
+}
+
+static inline u64 nft_bitmap_total_size(u32 klen)
+{
+	return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
+}
+
+static u64 nft_bitmap_privsize(const struct nlattr * const nla[],
+			       const struct nft_set_desc *desc)
+{
+	u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+
+	return nft_bitmap_total_size(klen);
+}
+
+static int nft_bitmap_init(const struct nft_set *set,
+			   const struct nft_set_desc *desc,
+			   const struct nlattr * const nla[])
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+
+	INIT_LIST_HEAD(&priv->list);
+	priv->bitmap_size = nft_bitmap_size(set->klen);
+
+	return 0;
+}
+
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+			       const struct nft_set *set)
+{
+	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be, *n;
+
+	list_for_each_entry_safe(be, n, &priv->list, head)
+		nf_tables_set_elem_destroy(ctx, set, be);
+}
+
+static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+				struct nft_set_estimate *est)
+{
+	/* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */
+	if (desc->klen > 2)
+		return false;
+	else if (desc->expr)
+		return false;
+
+	est->size   = nft_bitmap_total_size(desc->klen);
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_1;
+
+	return true;
+}
+
+const struct nft_set_type nft_set_bitmap_type = {
+	.ops		= {
+		.privsize	= nft_bitmap_privsize,
+		.elemsize	= offsetof(struct nft_bitmap_elem, ext),
+		.estimate	= nft_bitmap_estimate,
+		.init		= nft_bitmap_init,
+		.destroy	= nft_bitmap_destroy,
+		.insert		= nft_bitmap_insert,
+		.remove		= nft_bitmap_remove,
+		.deactivate	= nft_bitmap_deactivate,
+		.flush		= nft_bitmap_flush,
+		.activate	= nft_bitmap_activate,
+		.lookup		= nft_bitmap_lookup,
+		.walk		= nft_bitmap_walk,
+		.get		= nft_bitmap_get,
+	},
+};
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644
index 000000000..2013de934
--- /dev/null
+++ b/net/netfilter/nft_set_hash.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_RHASH_ELEMENT_HINT 3
+
+struct nft_rhash {
+	struct rhashtable		ht;
+	struct delayed_work		gc_work;
+};
+
+struct nft_rhash_elem {
+	struct rhash_head		node;
+	struct nft_set_ext		ext;
+};
+
+struct nft_rhash_cmp_arg {
+	const struct nft_set		*set;
+	const u32			*key;
+	u8				genmask;
+};
+
+static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
+{
+	const struct nft_rhash_cmp_arg *arg = data;
+
+	return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_rhash_elem *he = data;
+
+	return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
+				const void *ptr)
+{
+	const struct nft_rhash_cmp_arg *x = arg->key;
+	const struct nft_rhash_elem *he = ptr;
+
+	if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+		return 1;
+	if (nft_set_elem_is_dead(&he->ext))
+		return 1;
+	if (nft_set_elem_expired(&he->ext))
+		return 1;
+	if (!nft_set_elem_active(&he->ext, x->genmask))
+		return 1;
+	return 0;
+}
+
+static const struct rhashtable_params nft_rhash_params = {
+	.head_offset		= offsetof(struct nft_rhash_elem, node),
+	.hashfn			= nft_rhash_key,
+	.obj_hashfn		= nft_rhash_obj,
+	.obj_cmpfn		= nft_rhash_cmp,
+	.automatic_shrinking	= true,
+};
+
+INDIRECT_CALLABLE_SCOPE
+bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+		      const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	const struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = nft_genmask_cur(net),
+		.set	 = set,
+		.key	 = key,
+	};
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he != NULL)
+		*ext = &he->ext;
+
+	return !!he;
+}
+
+static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
+			   const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = nft_genmask_cur(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he != NULL)
+		return he;
+
+	return ERR_PTR(-ENOENT);
+}
+
+static bool nft_rhash_update(struct nft_set *set, const u32 *key,
+			     void *(*new)(struct nft_set *,
+					  const struct nft_expr *,
+					  struct nft_regs *regs),
+			     const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_set_ext **ext)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he, *prev;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set	 = set,
+		.key	 = key,
+	};
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he != NULL)
+		goto out;
+
+	he = new(set, expr, regs);
+	if (he == NULL)
+		goto err1;
+
+	prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+						nft_rhash_params);
+	if (IS_ERR(prev))
+		goto err2;
+
+	/* Another cpu may race to insert the element with the same key */
+	if (prev) {
+		nft_set_elem_destroy(set, he, true);
+		atomic_dec(&set->nelems);
+		he = prev;
+	}
+
+out:
+	*ext = &he->ext;
+	return true;
+
+err2:
+	nft_set_elem_destroy(set, he, true);
+	atomic_dec(&set->nelems);
+err1:
+	return false;
+}
+
+static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem,
+			    struct nft_set_ext **ext)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he = elem->priv;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = nft_genmask_next(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+	struct nft_rhash_elem *prev;
+
+	prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+						nft_rhash_params);
+	if (IS_ERR(prev))
+		return PTR_ERR(prev);
+	if (prev) {
+		*ext = &prev->ext;
+		return -EEXIST;
+	}
+	return 0;
+}
+
+static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
+			       const struct nft_set_elem *elem)
+{
+	struct nft_rhash_elem *he = elem->priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+}
+
+static bool nft_rhash_flush(const struct net *net,
+			    const struct nft_set *set, void *priv)
+{
+	struct nft_rhash_elem *he = priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+
+	return true;
+}
+
+static void *nft_rhash_deactivate(const struct net *net,
+				  const struct nft_set *set,
+				  const struct nft_set_elem *elem)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = nft_genmask_next(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+
+	rcu_read_lock();
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he)
+		nft_set_elem_change_active(net, set, &he->ext);
+
+	rcu_read_unlock();
+
+	return he;
+}
+
+static void nft_rhash_remove(const struct net *net,
+			     const struct nft_set *set,
+			     const struct nft_set_elem *elem)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he = elem->priv;
+
+	rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
+}
+
+static bool nft_rhash_delete(const struct nft_set *set,
+			     const u32 *key)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set = set,
+		.key = key,
+	};
+	struct nft_rhash_elem *he;
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he == NULL)
+		return false;
+
+	nft_set_elem_dead(&he->ext);
+
+	return true;
+}
+
+static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+			   struct nft_set_iter *iter)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
+	struct rhashtable_iter hti;
+	struct nft_set_elem elem;
+
+	rhashtable_walk_enter(&priv->ht, &hti);
+	rhashtable_walk_start(&hti);
+
+	while ((he = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(he)) {
+			if (PTR_ERR(he) != -EAGAIN) {
+				iter->err = PTR_ERR(he);
+				break;
+			}
+
+			continue;
+		}
+
+		if (iter->count < iter->skip)
+			goto cont;
+		if (!nft_set_elem_active(&he->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = he;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0)
+			break;
+
+cont:
+		iter->count++;
+	}
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+}
+
+static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
+					struct nft_set_ext *ext)
+{
+	struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext);
+	struct nft_expr *expr;
+	u32 size;
+
+	nft_setelem_expr_foreach(expr, elem_expr, size) {
+		if (expr->ops->gc &&
+		    expr->ops->gc(read_pnet(&set->net), expr))
+			return true;
+	}
+
+	return false;
+}
+
+static void nft_rhash_gc(struct work_struct *work)
+{
+	struct nftables_pernet *nft_net;
+	struct nft_set *set;
+	struct nft_rhash_elem *he;
+	struct nft_rhash *priv;
+	struct rhashtable_iter hti;
+	struct nft_trans_gc *gc;
+	struct net *net;
+	u32 gc_seq;
+
+	priv = container_of(work, struct nft_rhash, gc_work.work);
+	set  = nft_set_container_of(priv);
+	net  = read_pnet(&set->net);
+	nft_net = nft_pernet(net);
+	gc_seq = READ_ONCE(nft_net->gc_seq);
+
+	if (nft_set_gc_is_pending(set))
+		goto done;
+
+	gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+	if (!gc)
+		goto done;
+
+	rhashtable_walk_enter(&priv->ht, &hti);
+	rhashtable_walk_start(&hti);
+
+	while ((he = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(he)) {
+			nft_trans_gc_destroy(gc);
+			gc = NULL;
+			goto try_later;
+		}
+
+		/* Ruleset has been updated, try later. */
+		if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+			nft_trans_gc_destroy(gc);
+			gc = NULL;
+			goto try_later;
+		}
+
+		if (nft_set_elem_is_dead(&he->ext))
+			goto dead_elem;
+
+		if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
+		    nft_rhash_expr_needs_gc_run(set, &he->ext))
+			goto needs_gc_run;
+
+		if (!nft_set_elem_expired(&he->ext))
+			continue;
+needs_gc_run:
+		nft_set_elem_dead(&he->ext);
+dead_elem:
+		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+		if (!gc)
+			goto try_later;
+
+		nft_trans_gc_elem_add(gc, he);
+	}
+
+	gc = nft_trans_gc_catchall_async(gc, gc_seq);
+
+try_later:
+	/* catchall list iteration requires rcu read side lock. */
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+
+	if (gc)
+		nft_trans_gc_queue_async_done(gc);
+
+done:
+	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+			   nft_set_gc_interval(set));
+}
+
+static u64 nft_rhash_privsize(const struct nlattr * const nla[],
+			      const struct nft_set_desc *desc)
+{
+	return sizeof(struct nft_rhash);
+}
+
+static void nft_rhash_gc_init(const struct nft_set *set)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+
+	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+			   nft_set_gc_interval(set));
+}
+
+static int nft_rhash_init(const struct nft_set *set,
+			  const struct nft_set_desc *desc,
+			  const struct nlattr * const tb[])
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct rhashtable_params params = nft_rhash_params;
+	int err;
+
+	params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
+	params.key_len	  = set->klen;
+
+	err = rhashtable_init(&priv->ht, &params);
+	if (err < 0)
+		return err;
+
+	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
+	if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
+		nft_rhash_gc_init(set);
+
+	return 0;
+}
+
+struct nft_rhash_ctx {
+	const struct nft_ctx	ctx;
+	const struct nft_set	*set;
+};
+
+static void nft_rhash_elem_destroy(void *ptr, void *arg)
+{
+	struct nft_rhash_ctx *rhash_ctx = arg;
+
+	nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
+}
+
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+			      const struct nft_set *set)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_ctx rhash_ctx = {
+		.ctx	= *ctx,
+		.set	= set,
+	};
+
+	cancel_delayed_work_sync(&priv->gc_work);
+	rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
+				    (void *)&rhash_ctx);
+}
+
+/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+#define NFT_MAX_BUCKETS (1U << 31)
+
+static u32 nft_hash_buckets(u32 size)
+{
+	u64 val = div_u64((u64)size * 4, 3);
+
+	if (val >= NFT_MAX_BUCKETS)
+		return NFT_MAX_BUCKETS;
+
+	return roundup_pow_of_two(val);
+}
+
+static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
+			       struct nft_set_estimate *est)
+{
+	est->size   = ~0;
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+struct nft_hash {
+	u32				seed;
+	u32				buckets;
+	struct hlist_head		table[];
+};
+
+struct nft_hash_elem {
+	struct hlist_node		node;
+	struct nft_set_ext		ext;
+};
+
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+		     const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	const struct nft_hash_elem *he;
+	u32 hash;
+
+	hash = jhash(key, set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return true;
+		}
+	}
+	return false;
+}
+
+static void *nft_hash_get(const struct net *net, const struct nft_set *set,
+			  const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_hash_elem *he;
+	u32 hash;
+
+	hash = jhash(elem->key.val.data, set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask))
+			return he;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup_fast(const struct net *net,
+			  const struct nft_set *set,
+			  const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	const struct nft_hash_elem *he;
+	u32 hash, k1, k2;
+
+	k1 = *key;
+	hash = jhash_1word(k1, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
+		if (k1 == k2 &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return true;
+		}
+	}
+	return false;
+}
+
+static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
+		     const struct nft_set_ext *ext)
+{
+	const struct nft_data *key = nft_set_ext_key(ext);
+	u32 hash, k1;
+
+	if (set->klen == 4) {
+		k1 = *(u32 *)key;
+		hash = jhash_1word(k1, priv->seed);
+	} else {
+		hash = jhash(key, set->klen, priv->seed);
+	}
+	hash = reciprocal_scale(hash, priv->buckets);
+
+	return hash;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+			   const struct nft_set_elem *elem,
+			   struct nft_set_ext **ext)
+{
+	struct nft_hash_elem *this = elem->priv, *he;
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_next(net);
+	u32 hash;
+
+	hash = nft_jhash(set, priv, &this->ext);
+	hlist_for_each_entry(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&this->ext),
+			    nft_set_ext_key(&he->ext), set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return -EEXIST;
+		}
+	}
+	hlist_add_head_rcu(&this->node, &priv->table[hash]);
+	return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+}
+
+static bool nft_hash_flush(const struct net *net,
+			   const struct nft_set *set, void *priv)
+{
+	struct nft_hash_elem *he = priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+	return true;
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+				 const struct nft_set *set,
+				 const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *this = elem->priv, *he;
+	u8 genmask = nft_genmask_next(net);
+	u32 hash;
+
+	hash = nft_jhash(set, priv, &this->ext);
+	hlist_for_each_entry(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val,
+			    set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			nft_set_elem_change_active(net, set, &he->ext);
+			return he;
+		}
+	}
+	return NULL;
+}
+
+static void nft_hash_remove(const struct net *net,
+			    const struct nft_set *set,
+			    const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->priv;
+
+	hlist_del_rcu(&he->node);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+			  struct nft_set_iter *iter)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_set_elem elem;
+	int i;
+
+	for (i = 0; i < priv->buckets; i++) {
+		hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+			if (iter->count < iter->skip)
+				goto cont;
+			if (!nft_set_elem_active(&he->ext, iter->genmask))
+				goto cont;
+
+			elem.priv = he;
+
+			iter->err = iter->fn(ctx, set, iter, &elem);
+			if (iter->err < 0)
+				return;
+cont:
+			iter->count++;
+		}
+	}
+}
+
+static u64 nft_hash_privsize(const struct nlattr * const nla[],
+			     const struct nft_set_desc *desc)
+{
+	return sizeof(struct nft_hash) +
+	       (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+}
+
+static int nft_hash_init(const struct nft_set *set,
+			 const struct nft_set_desc *desc,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_set_priv(set);
+
+	priv->buckets = nft_hash_buckets(desc->size);
+	get_random_bytes(&priv->seed, sizeof(priv->seed));
+
+	return 0;
+}
+
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+			     const struct nft_set *set)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct hlist_node *next;
+	int i;
+
+	for (i = 0; i < priv->buckets; i++) {
+		hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+			hlist_del_rcu(&he->node);
+			nf_tables_set_elem_destroy(ctx, set, he);
+		}
+	}
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	if (!desc->size)
+		return false;
+
+	if (desc->klen == 4)
+		return false;
+
+	est->size   = sizeof(struct nft_hash) +
+		      (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+		      (u64)desc->size * sizeof(struct nft_hash_elem);
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
+				   struct nft_set_estimate *est)
+{
+	if (!desc->size)
+		return false;
+
+	if (desc->klen != 4)
+		return false;
+
+	est->size   = sizeof(struct nft_hash) +
+		      (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+		      (u64)desc->size * sizeof(struct nft_hash_elem);
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+const struct nft_set_type nft_set_rhash_type = {
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT |
+			  NFT_SET_TIMEOUT | NFT_SET_EVAL,
+	.ops		= {
+		.privsize       = nft_rhash_privsize,
+		.elemsize	= offsetof(struct nft_rhash_elem, ext),
+		.estimate	= nft_rhash_estimate,
+		.init		= nft_rhash_init,
+		.gc_init	= nft_rhash_gc_init,
+		.destroy	= nft_rhash_destroy,
+		.insert		= nft_rhash_insert,
+		.activate	= nft_rhash_activate,
+		.deactivate	= nft_rhash_deactivate,
+		.flush		= nft_rhash_flush,
+		.remove		= nft_rhash_remove,
+		.lookup		= nft_rhash_lookup,
+		.update		= nft_rhash_update,
+		.delete		= nft_rhash_delete,
+		.walk		= nft_rhash_walk,
+		.get		= nft_rhash_get,
+	},
+};
+
+const struct nft_set_type nft_set_hash_type = {
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+	.ops		= {
+		.privsize       = nft_hash_privsize,
+		.elemsize	= offsetof(struct nft_hash_elem, ext),
+		.estimate	= nft_hash_estimate,
+		.init		= nft_hash_init,
+		.destroy	= nft_hash_destroy,
+		.insert		= nft_hash_insert,
+		.activate	= nft_hash_activate,
+		.deactivate	= nft_hash_deactivate,
+		.flush		= nft_hash_flush,
+		.remove		= nft_hash_remove,
+		.lookup		= nft_hash_lookup,
+		.walk		= nft_hash_walk,
+		.get		= nft_hash_get,
+	},
+};
+
+const struct nft_set_type nft_set_hash_fast_type = {
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+	.ops		= {
+		.privsize       = nft_hash_privsize,
+		.elemsize	= offsetof(struct nft_hash_elem, ext),
+		.estimate	= nft_hash_fast_estimate,
+		.init		= nft_hash_init,
+		.destroy	= nft_hash_destroy,
+		.insert		= nft_hash_insert,
+		.activate	= nft_hash_activate,
+		.deactivate	= nft_hash_deactivate,
+		.flush		= nft_hash_flush,
+		.remove		= nft_hash_remove,
+		.lookup		= nft_hash_lookup_fast,
+		.walk		= nft_hash_walk,
+		.get		= nft_hash_get,
+	},
+};
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
new file mode 100644
index 000000000..4e1cc3172
--- /dev/null
+++ b/net/netfilter/nft_set_pipapo.c
@@ -0,0 +1,2331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* PIPAPO: PIle PAcket POlicies: set for arbitrary concatenations of ranges
+ *
+ * Copyright (c) 2019-2020 Red Hat GmbH
+ *
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+/**
+ * DOC: Theory of Operation
+ *
+ *
+ * Problem
+ * -------
+ *
+ * Match packet bytes against entries composed of ranged or non-ranged packet
+ * field specifiers, mapping them to arbitrary references. For example:
+ *
+ * ::
+ *
+ *               --- fields --->
+ *      |    [net],[port],[net]... => [reference]
+ *   entries [net],[port],[net]... => [reference]
+ *      |    [net],[port],[net]... => [reference]
+ *      V    ...
+ *
+ * where [net] fields can be IP ranges or netmasks, and [port] fields are port
+ * ranges. Arbitrary packet fields can be matched.
+ *
+ *
+ * Algorithm Overview
+ * ------------------
+ *
+ * This algorithm is loosely inspired by [Ligatti 2010], and fundamentally
+ * relies on the consideration that every contiguous range in a space of b bits
+ * can be converted into b * 2 netmasks, from Theorem 3 in [Rottenstreich 2010],
+ * as also illustrated in Section 9 of [Kogan 2014].
+ *
+ * Classification against a number of entries, that require matching given bits
+ * of a packet field, is performed by grouping those bits in sets of arbitrary
+ * size, and classifying packet bits one group at a time.
+ *
+ * Example:
+ *   to match the source port (16 bits) of a packet, we can divide those 16 bits
+ *   in 4 groups of 4 bits each. Given the entry:
+ *      0000 0001 0101 1001
+ *   and a packet with source port:
+ *      0000 0001 1010 1001
+ *   first and second groups match, but the third doesn't. We conclude that the
+ *   packet doesn't match the given entry.
+ *
+ * Translate the set to a sequence of lookup tables, one per field. Each table
+ * has two dimensions: bit groups to be matched for a single packet field, and
+ * all the possible values of said groups (buckets). Input entries are
+ * represented as one or more rules, depending on the number of composing
+ * netmasks for the given field specifier, and a group match is indicated as a
+ * set bit, with number corresponding to the rule index, in all the buckets
+ * whose value matches the entry for a given group.
+ *
+ * Rules are mapped between fields through an array of x, n pairs, with each
+ * item mapping a matched rule to one or more rules. The position of the pair in
+ * the array indicates the matched rule to be mapped to the next field, x
+ * indicates the first rule index in the next field, and n the amount of
+ * next-field rules the current rule maps to.
+ *
+ * The mapping array for the last field maps to the desired references.
+ *
+ * To match, we perform table lookups using the values of grouped packet bits,
+ * and use a sequence of bitwise operations to progressively evaluate rule
+ * matching.
+ *
+ * A stand-alone, reference implementation, also including notes about possible
+ * future optimisations, is available at:
+ *    https://pipapo.lameexcu.se/
+ *
+ * Insertion
+ * ---------
+ *
+ * - For each packet field:
+ *
+ *   - divide the b packet bits we want to classify into groups of size t,
+ *     obtaining ceil(b / t) groups
+ *
+ *      Example: match on destination IP address, with t = 4: 32 bits, 8 groups
+ *      of 4 bits each
+ *
+ *   - allocate a lookup table with one column ("bucket") for each possible
+ *     value of a group, and with one row for each group
+ *
+ *      Example: 8 groups, 2^4 buckets:
+ *
+ * ::
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0
+ *        1
+ *        2
+ *        3
+ *        4
+ *        5
+ *        6
+ *        7
+ *
+ *   - map the bits we want to classify for the current field, for a given
+ *     entry, to a single rule for non-ranged and netmask set items, and to one
+ *     or multiple rules for ranges. Ranges are expanded to composing netmasks
+ *     by pipapo_expand().
+ *
+ *      Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048
+ *      - rule #0: 10.0.0.5
+ *      - rule #1: 192.168.1.0/24
+ *      - rule #2: 192.168.2.0/31
+ *
+ *   - insert references to the rules in the lookup table, selecting buckets
+ *     according to bit values of a rule in the given group. This is done by
+ *     pipapo_insert().
+ *
+ *      Example: given:
+ *      - rule #0: 10.0.0.5 mapping to buckets
+ *        < 0 10  0 0   0 0  0 5 >
+ *      - rule #1: 192.168.1.0/24 mapping to buckets
+ *        < 12 0  10 8  0 1  < 0..15 > < 0..15 > >
+ *      - rule #2: 192.168.2.0/31 mapping to buckets
+ *        < 12 0  10 8  0 2  0 < 0..1 > >
+ *
+ *      these bits are set in the lookup table:
+ *
+ * ::
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0    0                                              1,2
+ *        1   1,2                                      0
+ *        2    0                                      1,2
+ *        3    0                              1,2
+ *        4  0,1,2
+ *        5    0   1   2
+ *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
+ *
+ *   - if this is not the last field in the set, fill a mapping array that maps
+ *     rules from the lookup table to rules belonging to the same entry in
+ *     the next lookup table, done by pipapo_map().
+ *
+ *     Note that as rules map to contiguous ranges of rules, given how netmask
+ *     expansion and insertion is performed, &union nft_pipapo_map_bucket stores
+ *     this information as pairs of first rule index, rule count.
+ *
+ *      Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048,
+ *      given lookup table #0 for field 0 (see example above):
+ *
+ * ::
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0    0                                              1,2
+ *        1   1,2                                      0
+ *        2    0                                      1,2
+ *        3    0                              1,2
+ *        4  0,1,2
+ *        5    0   1   2
+ *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
+ *
+ *      and lookup table #1 for field 1 with:
+ *      - rule #0: 1024 mapping to buckets
+ *        < 0  0  4  0 >
+ *      - rule #1: 2048 mapping to buckets
+ *        < 0  0  5  0 >
+ *
+ * ::
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0   0,1
+ *        1   0,1
+ *        2                    0   1
+ *        3   0,1
+ *
+ *      we need to map rules for 10.0.0.5 in lookup table #0 (rule #0) to 1024
+ *      in lookup table #1 (rule #0) and rules for 192.168.1.0-192.168.2.1
+ *      (rules #1, #2) to 2048 in lookup table #2 (rule #1):
+ *
+ * ::
+ *
+ *       rule indices in current field: 0    1    2
+ *       map to rules in next field:    0    1    1
+ *
+ *   - if this is the last field in the set, fill a mapping array that maps
+ *     rules from the last lookup table to element pointers, also done by
+ *     pipapo_map().
+ *
+ *     Note that, in this implementation, we have two elements (start, end) for
+ *     each entry. The pointer to the end element is stored in this array, and
+ *     the pointer to the start element is linked from it.
+ *
+ *      Example: entry 10.0.0.5:1024 has a corresponding &struct nft_pipapo_elem
+ *      pointer, 0x66, and element for 192.168.1.0-192.168.2.1:2048 is at 0x42.
+ *      From the rules of lookup table #1 as mapped above:
+ *
+ * ::
+ *
+ *       rule indices in last field:    0    1
+ *       map to elements:             0x66  0x42
+ *
+ *
+ * Matching
+ * --------
+ *
+ * We use a result bitmap, with the size of a single lookup table bucket, to
+ * represent the matching state that applies at every algorithm step. This is
+ * done by pipapo_lookup().
+ *
+ * - For each packet field:
+ *
+ *   - start with an all-ones result bitmap (res_map in pipapo_lookup())
+ *
+ *   - perform a lookup into the table corresponding to the current field,
+ *     for each group, and at every group, AND the current result bitmap with
+ *     the value from the lookup table bucket
+ *
+ * ::
+ *
+ *      Example: 192.168.1.5 < 12 0  10 8  0 1  0 5 >, with lookup table from
+ *      insertion examples.
+ *      Lookup table buckets are at least 3 bits wide, we'll assume 8 bits for
+ *      convenience in this example. Initial result bitmap is 0xff, the steps
+ *      below show the value of the result bitmap after each group is processed:
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0    0                                              1,2
+ *        result bitmap is now: 0xff & 0x6 [bucket 12] = 0x6
+ *
+ *        1   1,2                                      0
+ *        result bitmap is now: 0x6 & 0x6 [bucket 0] = 0x6
+ *
+ *        2    0                                      1,2
+ *        result bitmap is now: 0x6 & 0x6 [bucket 10] = 0x6
+ *
+ *        3    0                              1,2
+ *        result bitmap is now: 0x6 & 0x6 [bucket 8] = 0x6
+ *
+ *        4  0,1,2
+ *        result bitmap is now: 0x6 & 0x7 [bucket 0] = 0x6
+ *
+ *        5    0   1   2
+ *        result bitmap is now: 0x6 & 0x2 [bucket 1] = 0x2
+ *
+ *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *        result bitmap is now: 0x2 & 0x7 [bucket 0] = 0x2
+ *
+ *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
+ *        final result bitmap for this field is: 0x2 & 0x3 [bucket 5] = 0x2
+ *
+ *   - at the next field, start with a new, all-zeroes result bitmap. For each
+ *     bit set in the previous result bitmap, fill the new result bitmap
+ *     (fill_map in pipapo_lookup()) with the rule indices from the
+ *     corresponding buckets of the mapping field for this field, done by
+ *     pipapo_refill()
+ *
+ *      Example: with mapping table from insertion examples, with the current
+ *      result bitmap from the previous example, 0x02:
+ *
+ * ::
+ *
+ *       rule indices in current field: 0    1    2
+ *       map to rules in next field:    0    1    1
+ *
+ *      the new result bitmap will be 0x02: rule 1 was set, and rule 1 will be
+ *      set.
+ *
+ *      We can now extend this example to cover the second iteration of the step
+ *      above (lookup and AND bitmap): assuming the port field is
+ *      2048 < 0  0  5  0 >, with starting result bitmap 0x2, and lookup table
+ *      for "port" field from pre-computation example:
+ *
+ * ::
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0   0,1
+ *        1   0,1
+ *        2                    0   1
+ *        3   0,1
+ *
+ *       operations are: 0x2 & 0x3 [bucket 0] & 0x3 [bucket 0] & 0x2 [bucket 5]
+ *       & 0x3 [bucket 0], resulting bitmap is 0x2.
+ *
+ *   - if this is the last field in the set, look up the value from the mapping
+ *     array corresponding to the final result bitmap
+ *
+ *      Example: 0x2 resulting bitmap from 192.168.1.5:2048, mapping array for
+ *      last field from insertion example:
+ *
+ * ::
+ *
+ *       rule indices in last field:    0    1
+ *       map to elements:             0x66  0x42
+ *
+ *      the matching element is at 0x42.
+ *
+ *
+ * References
+ * ----------
+ *
+ * [Ligatti 2010]
+ *      A Packet-classification Algorithm for Arbitrary Bitmask Rules, with
+ *      Automatic Time-space Tradeoffs
+ *      Jay Ligatti, Josh Kuhn, and Chris Gage.
+ *      Proceedings of the IEEE International Conference on Computer
+ *      Communication Networks (ICCCN), August 2010.
+ *      https://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
+ *
+ * [Rottenstreich 2010]
+ *      Worst-Case TCAM Rule Expansion
+ *      Ori Rottenstreich and Isaac Keslassy.
+ *      2010 Proceedings IEEE INFOCOM, San Diego, CA, 2010.
+ *      http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.212.4592&rep=rep1&type=pdf
+ *
+ * [Kogan 2014]
+ *      SAX-PAC (Scalable And eXpressive PAcket Classification)
+ *      Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane,
+ *      and Patrick Eugster.
+ *      Proceedings of the 2014 ACM conference on SIGCOMM, August 2014.
+ *      https://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <uapi/linux/netfilter/nf_tables.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "nft_set_pipapo_avx2.h"
+#include "nft_set_pipapo.h"
+
+/* Current working bitmap index, toggled between field matches */
+static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
+
+/**
+ * pipapo_refill() - For each set bit, set bits from selected mapping table item
+ * @map:	Bitmap to be scanned for set bits
+ * @len:	Length of bitmap in longs
+ * @rules:	Number of rules in field
+ * @dst:	Destination bitmap
+ * @mt:		Mapping table containing bit set specifiers
+ * @match_only:	Find a single bit and return, don't fill
+ *
+ * Iteration over set bits with __builtin_ctzl(): Daniel Lemire, public domain.
+ *
+ * For each bit set in map, select the bucket from mapping table with index
+ * corresponding to the position of the bit set. Use start bit and amount of
+ * bits specified in bucket to fill region in dst.
+ *
+ * Return: -1 on no match, bit position on 'match_only', 0 otherwise.
+ */
+int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
+		  union nft_pipapo_map_bucket *mt, bool match_only)
+{
+	unsigned long bitset;
+	int k, ret = -1;
+
+	for (k = 0; k < len; k++) {
+		bitset = map[k];
+		while (bitset) {
+			unsigned long t = bitset & -bitset;
+			int r = __builtin_ctzl(bitset);
+			int i = k * BITS_PER_LONG + r;
+
+			if (unlikely(i >= rules)) {
+				map[k] = 0;
+				return -1;
+			}
+
+			if (match_only) {
+				bitmap_clear(map, i, 1);
+				return i;
+			}
+
+			ret = 0;
+
+			bitmap_set(dst, mt[i].to, mt[i].n);
+
+			bitset ^= t;
+		}
+		map[k] = 0;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_lookup() - Lookup function
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @key:	nftables API element representation containing key data
+ * @ext:	nftables API extension pointer, filled with matching reference
+ *
+ * For more details, see DOC: Theory of Operation.
+ *
+ * Return: true on match, false otherwise.
+ */
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	unsigned long *res_map, *fill_map;
+	u8 genmask = nft_genmask_cur(net);
+	const u8 *rp = (const u8 *)key;
+	struct nft_pipapo_match *m;
+	struct nft_pipapo_field *f;
+	bool map_index;
+	int i;
+
+	local_bh_disable();
+
+	map_index = raw_cpu_read(nft_pipapo_scratch_index);
+
+	m = rcu_dereference(priv->match);
+
+	if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
+		goto out;
+
+	res_map  = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
+	fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+
+	memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+
+	nft_pipapo_for_each_field(f, i, m) {
+		bool last = i == m->field_count - 1;
+		int b;
+
+		/* For each bit group: select lookup table bucket depending on
+		 * packet bytes value, then AND bucket value
+		 */
+		if (likely(f->bb == 8))
+			pipapo_and_field_buckets_8bit(f, res_map, rp);
+		else
+			pipapo_and_field_buckets_4bit(f, res_map, rp);
+		NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
+
+		rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
+
+		/* Now populate the bitmap for the next field, unless this is
+		 * the last field, in which case return the matched 'ext'
+		 * pointer if any.
+		 *
+		 * Now res_map contains the matching bitmap, and fill_map is the
+		 * bitmap for the next field.
+		 */
+next_match:
+		b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
+				  last);
+		if (b < 0) {
+			raw_cpu_write(nft_pipapo_scratch_index, map_index);
+			local_bh_enable();
+
+			return false;
+		}
+
+		if (last) {
+			*ext = &f->mt[b].e->ext;
+			if (unlikely(nft_set_elem_expired(*ext) ||
+				     !nft_set_elem_active(*ext, genmask)))
+				goto next_match;
+
+			/* Last field: we're just returning the key without
+			 * filling the initial bitmap for the next field, so the
+			 * current inactive bitmap is clean and can be reused as
+			 * *next* bitmap (not initial) for the next packet.
+			 */
+			raw_cpu_write(nft_pipapo_scratch_index, map_index);
+			local_bh_enable();
+
+			return true;
+		}
+
+		/* Swap bitmap indices: res_map is the initial bitmap for the
+		 * next field, and fill_map is guaranteed to be all-zeroes at
+		 * this point.
+		 */
+		map_index = !map_index;
+		swap(res_map, fill_map);
+
+		rp += NFT_PIPAPO_GROUPS_PADDING(f);
+	}
+
+out:
+	local_bh_enable();
+	return false;
+}
+
+/**
+ * pipapo_get() - Get matching element reference given key data
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @data:	Key data to be matched against existing elements
+ * @genmask:	If set, check that element is active in given genmask
+ *
+ * This is essentially the same as the lookup function, except that it matches
+ * key data against the uncommitted copy and doesn't use preallocated maps for
+ * bitmap results.
+ *
+ * Return: pointer to &struct nft_pipapo_elem on match, error pointer otherwise.
+ */
+static struct nft_pipapo_elem *pipapo_get(const struct net *net,
+					  const struct nft_set *set,
+					  const u8 *data, u8 genmask)
+{
+	struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *m = priv->clone;
+	unsigned long *res_map, *fill_map = NULL;
+	struct nft_pipapo_field *f;
+	int i;
+
+	res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+	if (!res_map) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
+	if (!fill_map) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+
+	nft_pipapo_for_each_field(f, i, m) {
+		bool last = i == m->field_count - 1;
+		int b;
+
+		/* For each bit group: select lookup table bucket depending on
+		 * packet bytes value, then AND bucket value
+		 */
+		if (f->bb == 8)
+			pipapo_and_field_buckets_8bit(f, res_map, data);
+		else if (f->bb == 4)
+			pipapo_and_field_buckets_4bit(f, res_map, data);
+		else
+			BUG();
+
+		data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
+
+		/* Now populate the bitmap for the next field, unless this is
+		 * the last field, in which case return the matched 'ext'
+		 * pointer if any.
+		 *
+		 * Now res_map contains the matching bitmap, and fill_map is the
+		 * bitmap for the next field.
+		 */
+next_match:
+		b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
+				  last);
+		if (b < 0)
+			goto out;
+
+		if (last) {
+			if (nft_set_elem_expired(&f->mt[b].e->ext))
+				goto next_match;
+			if ((genmask &&
+			     !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
+				goto next_match;
+
+			ret = f->mt[b].e;
+			goto out;
+		}
+
+		data += NFT_PIPAPO_GROUPS_PADDING(f);
+
+		/* Swap bitmap indices: fill_map will be the initial bitmap for
+		 * the next field (i.e. the new res_map), and res_map is
+		 * guaranteed to be all-zeroes at this point, ready to be filled
+		 * according to the next mapping table.
+		 */
+		swap(res_map, fill_map);
+	}
+
+out:
+	kfree(fill_map);
+	kfree(res_map);
+	return ret;
+}
+
+/**
+ * nft_pipapo_get() - Get matching element reference given key data
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ * @flags:	Unused
+ */
+static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem, unsigned int flags)
+{
+	return pipapo_get(net, set, (const u8 *)elem->key.val.data,
+			 nft_genmask_cur(net));
+}
+
+/**
+ * pipapo_resize() - Resize lookup or mapping table, or both
+ * @f:		Field containing lookup and mapping tables
+ * @old_rules:	Previous amount of rules in field
+ * @rules:	New amount of rules
+ *
+ * Increase, decrease or maintain tables size depending on new amount of rules,
+ * and copy data over. In case the new size is smaller, throw away data for
+ * highest-numbered rules.
+ *
+ * Return: 0 on success, -ENOMEM on allocation failure.
+ */
+static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
+{
+	long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
+	union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt;
+	size_t new_bucket_size, copy;
+	int group, bucket;
+
+	new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG);
+#ifdef NFT_PIPAPO_ALIGN
+	new_bucket_size = roundup(new_bucket_size,
+				  NFT_PIPAPO_ALIGN / sizeof(*new_lt));
+#endif
+
+	if (new_bucket_size == f->bsize)
+		goto mt;
+
+	if (new_bucket_size > f->bsize)
+		copy = f->bsize;
+	else
+		copy = new_bucket_size;
+
+	new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) *
+			  new_bucket_size * sizeof(*new_lt) +
+			  NFT_PIPAPO_ALIGN_HEADROOM,
+			  GFP_KERNEL);
+	if (!new_lt)
+		return -ENOMEM;
+
+	new_p = NFT_PIPAPO_LT_ALIGN(new_lt);
+	old_p = NFT_PIPAPO_LT_ALIGN(old_lt);
+
+	for (group = 0; group < f->groups; group++) {
+		for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS(f->bb); bucket++) {
+			memcpy(new_p, old_p, copy * sizeof(*new_p));
+			new_p += copy;
+			old_p += copy;
+
+			if (new_bucket_size > f->bsize)
+				new_p += new_bucket_size - f->bsize;
+			else
+				old_p += f->bsize - new_bucket_size;
+		}
+	}
+
+mt:
+	new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL);
+	if (!new_mt) {
+		kvfree(new_lt);
+		return -ENOMEM;
+	}
+
+	memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt));
+	if (rules > old_rules) {
+		memset(new_mt + old_rules, 0,
+		       (rules - old_rules) * sizeof(*new_mt));
+	}
+
+	if (new_lt) {
+		f->bsize = new_bucket_size;
+		NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+		kvfree(old_lt);
+	}
+
+	f->mt = new_mt;
+	kvfree(old_mt);
+
+	return 0;
+}
+
+/**
+ * pipapo_bucket_set() - Set rule bit in bucket given group and group value
+ * @f:		Field containing lookup table
+ * @rule:	Rule index
+ * @group:	Group index
+ * @v:		Value of bit group
+ */
+static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group,
+			      int v)
+{
+	unsigned long *pos;
+
+	pos = NFT_PIPAPO_LT_ALIGN(f->lt);
+	pos += f->bsize * NFT_PIPAPO_BUCKETS(f->bb) * group;
+	pos += f->bsize * v;
+
+	__set_bit(rule, pos);
+}
+
+/**
+ * pipapo_lt_4b_to_8b() - Switch lookup table group width from 4 bits to 8 bits
+ * @old_groups:	Number of current groups
+ * @bsize:	Size of one bucket, in longs
+ * @old_lt:	Pointer to the current lookup table
+ * @new_lt:	Pointer to the new, pre-allocated lookup table
+ *
+ * Each bucket with index b in the new lookup table, belonging to group g, is
+ * filled with the bit intersection between:
+ * - bucket with index given by the upper 4 bits of b, from group g, and
+ * - bucket with index given by the lower 4 bits of b, from group g + 1
+ *
+ * That is, given buckets from the new lookup table N(x, y) and the old lookup
+ * table O(x, y), with x bucket index, and y group index:
+ *
+ *	N(b, g) := O(b / 16, g) & O(b % 16, g + 1)
+ *
+ * This ensures equivalence of the matching results on lookup. Two examples in
+ * pictures:
+ *
+ *              bucket
+ *  group  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 ... 254 255
+ *    0                ^
+ *    1                |                                                 ^
+ *   ...             ( & )                                               |
+ *                  /     \                                              |
+ *                 /       \                                         .-( & )-.
+ *                /  bucket \                                        |       |
+ *      group  0 / 1   2   3 \ 4   5   6   7   8   9  10  11  12  13 |14  15 |
+ *        0     /             \                                      |       |
+ *        1                    \                                     |       |
+ *        2                                                          |     --'
+ *        3                                                          '-
+ *       ...
+ */
+static void pipapo_lt_4b_to_8b(int old_groups, int bsize,
+			       unsigned long *old_lt, unsigned long *new_lt)
+{
+	int g, b, i;
+
+	for (g = 0; g < old_groups / 2; g++) {
+		int src_g0 = g * 2, src_g1 = g * 2 + 1;
+
+		for (b = 0; b < NFT_PIPAPO_BUCKETS(8); b++) {
+			int src_b0 = b / NFT_PIPAPO_BUCKETS(4);
+			int src_b1 = b % NFT_PIPAPO_BUCKETS(4);
+			int src_i0 = src_g0 * NFT_PIPAPO_BUCKETS(4) + src_b0;
+			int src_i1 = src_g1 * NFT_PIPAPO_BUCKETS(4) + src_b1;
+
+			for (i = 0; i < bsize; i++) {
+				*new_lt = old_lt[src_i0 * bsize + i] &
+					  old_lt[src_i1 * bsize + i];
+				new_lt++;
+			}
+		}
+	}
+}
+
+/**
+ * pipapo_lt_8b_to_4b() - Switch lookup table group width from 8 bits to 4 bits
+ * @old_groups:	Number of current groups
+ * @bsize:	Size of one bucket, in longs
+ * @old_lt:	Pointer to the current lookup table
+ * @new_lt:	Pointer to the new, pre-allocated lookup table
+ *
+ * Each bucket with index b in the new lookup table, belonging to group g, is
+ * filled with the bit union of:
+ * - all the buckets with index such that the upper four bits of the lower byte
+ *   equal b, from group g, with g odd
+ * - all the buckets with index such that the lower four bits equal b, from
+ *   group g, with g even
+ *
+ * That is, given buckets from the new lookup table N(x, y) and the old lookup
+ * table O(x, y), with x bucket index, and y group index:
+ *
+ *	- with g odd:  N(b, g) := U(O(x, g) for each x : x = (b & 0xf0) >> 4)
+ *	- with g even: N(b, g) := U(O(x, g) for each x : x = b & 0x0f)
+ *
+ * where U() denotes the arbitrary union operation (binary OR of n terms). This
+ * ensures equivalence of the matching results on lookup.
+ */
+static void pipapo_lt_8b_to_4b(int old_groups, int bsize,
+			       unsigned long *old_lt, unsigned long *new_lt)
+{
+	int g, b, bsrc, i;
+
+	memset(new_lt, 0, old_groups * 2 * NFT_PIPAPO_BUCKETS(4) * bsize *
+			  sizeof(unsigned long));
+
+	for (g = 0; g < old_groups * 2; g += 2) {
+		int src_g = g / 2;
+
+		for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) {
+			for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g;
+			     bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1);
+			     bsrc++) {
+				if (((bsrc & 0xf0) >> 4) != b)
+					continue;
+
+				for (i = 0; i < bsize; i++)
+					new_lt[i] |= old_lt[bsrc * bsize + i];
+			}
+
+			new_lt += bsize;
+		}
+
+		for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) {
+			for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g;
+			     bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1);
+			     bsrc++) {
+				if ((bsrc & 0x0f) != b)
+					continue;
+
+				for (i = 0; i < bsize; i++)
+					new_lt[i] |= old_lt[bsrc * bsize + i];
+			}
+
+			new_lt += bsize;
+		}
+	}
+}
+
+/**
+ * pipapo_lt_bits_adjust() - Adjust group size for lookup table if needed
+ * @f:		Field containing lookup table
+ */
+static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
+{
+	unsigned long *new_lt;
+	int groups, bb;
+	size_t lt_size;
+
+	lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
+		  sizeof(*f->lt);
+
+	if (f->bb == NFT_PIPAPO_GROUP_BITS_SMALL_SET &&
+	    lt_size > NFT_PIPAPO_LT_SIZE_HIGH) {
+		groups = f->groups * 2;
+		bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET;
+
+		lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
+			  sizeof(*f->lt);
+	} else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET &&
+		   lt_size < NFT_PIPAPO_LT_SIZE_LOW) {
+		groups = f->groups / 2;
+		bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET;
+
+		lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
+			  sizeof(*f->lt);
+
+		/* Don't increase group width if the resulting lookup table size
+		 * would exceed the upper size threshold for a "small" set.
+		 */
+		if (lt_size > NFT_PIPAPO_LT_SIZE_HIGH)
+			return;
+	} else {
+		return;
+	}
+
+	new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL);
+	if (!new_lt)
+		return;
+
+	NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
+	if (f->bb == 4 && bb == 8) {
+		pipapo_lt_4b_to_8b(f->groups, f->bsize,
+				   NFT_PIPAPO_LT_ALIGN(f->lt),
+				   NFT_PIPAPO_LT_ALIGN(new_lt));
+	} else if (f->bb == 8 && bb == 4) {
+		pipapo_lt_8b_to_4b(f->groups, f->bsize,
+				   NFT_PIPAPO_LT_ALIGN(f->lt),
+				   NFT_PIPAPO_LT_ALIGN(new_lt));
+	} else {
+		BUG();
+	}
+
+	f->groups = groups;
+	f->bb = bb;
+	kvfree(f->lt);
+	NFT_PIPAPO_LT_ASSIGN(f, new_lt);
+}
+
+/**
+ * pipapo_insert() - Insert new rule in field given input key and mask length
+ * @f:		Field containing lookup table
+ * @k:		Input key for classification, without nftables padding
+ * @mask_bits:	Length of mask; matches field length for non-ranged entry
+ *
+ * Insert a new rule reference in lookup buckets corresponding to k and
+ * mask_bits.
+ *
+ * Return: 1 on success (one rule inserted), negative error code on failure.
+ */
+static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
+			 int mask_bits)
+{
+	int rule = f->rules, group, ret, bit_offset = 0;
+
+	ret = pipapo_resize(f, f->rules, f->rules + 1);
+	if (ret)
+		return ret;
+
+	f->rules++;
+
+	for (group = 0; group < f->groups; group++) {
+		int i, v;
+		u8 mask;
+
+		v = k[group / (BITS_PER_BYTE / f->bb)];
+		v &= GENMASK(BITS_PER_BYTE - bit_offset - 1, 0);
+		v >>= (BITS_PER_BYTE - bit_offset) - f->bb;
+
+		bit_offset += f->bb;
+		bit_offset %= BITS_PER_BYTE;
+
+		if (mask_bits >= (group + 1) * f->bb) {
+			/* Not masked */
+			pipapo_bucket_set(f, rule, group, v);
+		} else if (mask_bits <= group * f->bb) {
+			/* Completely masked */
+			for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++)
+				pipapo_bucket_set(f, rule, group, i);
+		} else {
+			/* The mask limit falls on this group */
+			mask = GENMASK(f->bb - 1, 0);
+			mask >>= mask_bits - group * f->bb;
+			for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) {
+				if ((i & ~mask) == (v & ~mask))
+					pipapo_bucket_set(f, rule, group, i);
+			}
+		}
+	}
+
+	pipapo_lt_bits_adjust(f);
+
+	return 1;
+}
+
+/**
+ * pipapo_step_diff() - Check if setting @step bit in netmask would change it
+ * @base:	Mask we are expanding
+ * @step:	Step bit for given expansion step
+ * @len:	Total length of mask space (set and unset bits), bytes
+ *
+ * Convenience function for mask expansion.
+ *
+ * Return: true if step bit changes mask (i.e. isn't set), false otherwise.
+ */
+static bool pipapo_step_diff(u8 *base, int step, int len)
+{
+	/* Network order, byte-addressed */
+#ifdef __BIG_ENDIAN__
+	return !(BIT(step % BITS_PER_BYTE) & base[step / BITS_PER_BYTE]);
+#else
+	return !(BIT(step % BITS_PER_BYTE) &
+		 base[len - 1 - step / BITS_PER_BYTE]);
+#endif
+}
+
+/**
+ * pipapo_step_after_end() - Check if mask exceeds range end with given step
+ * @base:	Mask we are expanding
+ * @end:	End of range
+ * @step:	Step bit for given expansion step, highest bit to be set
+ * @len:	Total length of mask space (set and unset bits), bytes
+ *
+ * Convenience function for mask expansion.
+ *
+ * Return: true if mask exceeds range setting step bits, false otherwise.
+ */
+static bool pipapo_step_after_end(const u8 *base, const u8 *end, int step,
+				  int len)
+{
+	u8 tmp[NFT_PIPAPO_MAX_BYTES];
+	int i;
+
+	memcpy(tmp, base, len);
+
+	/* Network order, byte-addressed */
+	for (i = 0; i <= step; i++)
+#ifdef __BIG_ENDIAN__
+		tmp[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
+#else
+		tmp[len - 1 - i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
+#endif
+
+	return memcmp(tmp, end, len) > 0;
+}
+
+/**
+ * pipapo_base_sum() - Sum step bit to given len-sized netmask base with carry
+ * @base:	Netmask base
+ * @step:	Step bit to sum
+ * @len:	Netmask length, bytes
+ */
+static void pipapo_base_sum(u8 *base, int step, int len)
+{
+	bool carry = false;
+	int i;
+
+	/* Network order, byte-addressed */
+#ifdef __BIG_ENDIAN__
+	for (i = step / BITS_PER_BYTE; i < len; i++) {
+#else
+	for (i = len - 1 - step / BITS_PER_BYTE; i >= 0; i--) {
+#endif
+		if (carry)
+			base[i]++;
+		else
+			base[i] += 1 << (step % BITS_PER_BYTE);
+
+		if (base[i])
+			break;
+
+		carry = true;
+	}
+}
+
+/**
+ * pipapo_expand() - Expand to composing netmasks, insert into lookup table
+ * @f:		Field containing lookup table
+ * @start:	Start of range
+ * @end:	End of range
+ * @len:	Length of value in bits
+ *
+ * Expand range to composing netmasks and insert corresponding rule references
+ * in lookup buckets.
+ *
+ * Return: number of inserted rules on success, negative error code on failure.
+ */
+static int pipapo_expand(struct nft_pipapo_field *f,
+			 const u8 *start, const u8 *end, int len)
+{
+	int step, masks = 0, bytes = DIV_ROUND_UP(len, BITS_PER_BYTE);
+	u8 base[NFT_PIPAPO_MAX_BYTES];
+
+	memcpy(base, start, bytes);
+	while (memcmp(base, end, bytes) <= 0) {
+		int err;
+
+		step = 0;
+		while (pipapo_step_diff(base, step, bytes)) {
+			if (pipapo_step_after_end(base, end, step, bytes))
+				break;
+
+			step++;
+			if (step >= len) {
+				if (!masks) {
+					err = pipapo_insert(f, base, 0);
+					if (err < 0)
+						return err;
+					masks = 1;
+				}
+				goto out;
+			}
+		}
+
+		err = pipapo_insert(f, base, len - step);
+
+		if (err < 0)
+			return err;
+
+		masks++;
+		pipapo_base_sum(base, step, bytes);
+	}
+out:
+	return masks;
+}
+
+/**
+ * pipapo_map() - Insert rules in mapping tables, mapping them between fields
+ * @m:		Matching data, including mapping table
+ * @map:	Table of rule maps: array of first rule and amount of rules
+ *		in next field a given rule maps to, for each field
+ * @e:		For last field, nft_set_ext pointer matching rules map to
+ */
+static void pipapo_map(struct nft_pipapo_match *m,
+		       union nft_pipapo_map_bucket map[NFT_PIPAPO_MAX_FIELDS],
+		       struct nft_pipapo_elem *e)
+{
+	struct nft_pipapo_field *f;
+	int i, j;
+
+	for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) {
+		for (j = 0; j < map[i].n; j++) {
+			f->mt[map[i].to + j].to = map[i + 1].to;
+			f->mt[map[i].to + j].n = map[i + 1].n;
+		}
+	}
+
+	/* Last field: map to ext instead of mapping to next field */
+	for (j = 0; j < map[i].n; j++)
+		f->mt[map[i].to + j].e = e;
+}
+
+/**
+ * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
+ * @clone:	Copy of matching data with pending insertions and deletions
+ * @bsize_max:	Maximum bucket size, scratch maps cover two buckets
+ *
+ * Return: 0 on success, -ENOMEM on failure.
+ */
+static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
+				  unsigned long bsize_max)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		unsigned long *scratch;
+#ifdef NFT_PIPAPO_ALIGN
+		unsigned long *scratch_aligned;
+#endif
+
+		scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+				       NFT_PIPAPO_ALIGN_HEADROOM,
+				       GFP_KERNEL, cpu_to_node(i));
+		if (!scratch) {
+			/* On failure, there's no need to undo previous
+			 * allocations: this means that some scratch maps have
+			 * a bigger allocated size now (this is only called on
+			 * insertion), but the extra space won't be used by any
+			 * CPU as new elements are not inserted and m->bsize_max
+			 * is not updated.
+			 */
+			return -ENOMEM;
+		}
+
+		kfree(*per_cpu_ptr(clone->scratch, i));
+
+		*per_cpu_ptr(clone->scratch, i) = scratch;
+
+#ifdef NFT_PIPAPO_ALIGN
+		scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
+		*per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+#endif
+	}
+
+	return 0;
+}
+
+/**
+ * nft_pipapo_insert() - Validate and insert ranged elements
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ * @ext2:	Filled with pointer to &struct nft_set_ext in inserted element
+ *
+ * Return: 0 on success, error pointer on failure.
+ */
+static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
+			     const struct nft_set_elem *elem,
+			     struct nft_set_ext **ext2)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
+	const u8 *start = (const u8 *)elem->key.val.data, *end;
+	struct nft_pipapo_elem *e = elem->priv, *dup;
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *m = priv->clone;
+	u8 genmask = nft_genmask_next(net);
+	struct nft_pipapo_field *f;
+	const u8 *start_p, *end_p;
+	int i, bsize_max, err = 0;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+		end = (const u8 *)nft_set_ext_key_end(ext)->data;
+	else
+		end = start;
+
+	dup = pipapo_get(net, set, start, genmask);
+	if (!IS_ERR(dup)) {
+		/* Check if we already have the same exact entry */
+		const struct nft_data *dup_key, *dup_end;
+
+		dup_key = nft_set_ext_key(&dup->ext);
+		if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END))
+			dup_end = nft_set_ext_key_end(&dup->ext);
+		else
+			dup_end = dup_key;
+
+		if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
+		    !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+			*ext2 = &dup->ext;
+			return -EEXIST;
+		}
+
+		return -ENOTEMPTY;
+	}
+
+	if (PTR_ERR(dup) == -ENOENT) {
+		/* Look for partially overlapping entries */
+		dup = pipapo_get(net, set, end, nft_genmask_next(net));
+	}
+
+	if (PTR_ERR(dup) != -ENOENT) {
+		if (IS_ERR(dup))
+			return PTR_ERR(dup);
+		*ext2 = &dup->ext;
+		return -ENOTEMPTY;
+	}
+
+	/* Validate */
+	start_p = start;
+	end_p = end;
+	nft_pipapo_for_each_field(f, i, m) {
+		if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
+			return -ENOSPC;
+
+		if (memcmp(start_p, end_p,
+			   f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) > 0)
+			return -EINVAL;
+
+		start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+		end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+	}
+
+	/* Insert */
+	priv->dirty = true;
+
+	bsize_max = m->bsize_max;
+
+	nft_pipapo_for_each_field(f, i, m) {
+		int ret;
+
+		rulemap[i].to = f->rules;
+
+		ret = memcmp(start, end,
+			     f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f));
+		if (!ret)
+			ret = pipapo_insert(f, start, f->groups * f->bb);
+		else
+			ret = pipapo_expand(f, start, end, f->groups * f->bb);
+
+		if (ret < 0)
+			return ret;
+
+		if (f->bsize > bsize_max)
+			bsize_max = f->bsize;
+
+		rulemap[i].n = ret;
+
+		start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+		end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+	}
+
+	if (!*get_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
+		put_cpu_ptr(m->scratch);
+
+		err = pipapo_realloc_scratch(m, bsize_max);
+		if (err)
+			return err;
+
+		m->bsize_max = bsize_max;
+	} else {
+		put_cpu_ptr(m->scratch);
+	}
+
+	*ext2 = &e->ext;
+
+	pipapo_map(m, rulemap, e);
+
+	return 0;
+}
+
+/**
+ * pipapo_clone() - Clone matching data to create new working copy
+ * @old:	Existing matching data
+ *
+ * Return: copy of matching data passed as 'old', error pointer on failure
+ */
+static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
+{
+	struct nft_pipapo_field *dst, *src;
+	struct nft_pipapo_match *new;
+	int i;
+
+	new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
+		      GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new->field_count = old->field_count;
+	new->bsize_max = old->bsize_max;
+
+	new->scratch = alloc_percpu(*new->scratch);
+	if (!new->scratch)
+		goto out_scratch;
+
+#ifdef NFT_PIPAPO_ALIGN
+	new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
+	if (!new->scratch_aligned)
+		goto out_scratch;
+#endif
+	for_each_possible_cpu(i)
+		*per_cpu_ptr(new->scratch, i) = NULL;
+
+	if (pipapo_realloc_scratch(new, old->bsize_max))
+		goto out_scratch_realloc;
+
+	rcu_head_init(&new->rcu);
+
+	src = old->f;
+	dst = new->f;
+
+	for (i = 0; i < old->field_count; i++) {
+		unsigned long *new_lt;
+
+		memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));
+
+		new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
+				  src->bsize * sizeof(*dst->lt) +
+				  NFT_PIPAPO_ALIGN_HEADROOM,
+				  GFP_KERNEL);
+		if (!new_lt)
+			goto out_lt;
+
+		NFT_PIPAPO_LT_ASSIGN(dst, new_lt);
+
+		memcpy(NFT_PIPAPO_LT_ALIGN(new_lt),
+		       NFT_PIPAPO_LT_ALIGN(src->lt),
+		       src->bsize * sizeof(*dst->lt) *
+		       src->groups * NFT_PIPAPO_BUCKETS(src->bb));
+
+		dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
+		if (!dst->mt)
+			goto out_mt;
+
+		memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
+		src++;
+		dst++;
+	}
+
+	return new;
+
+out_mt:
+	kvfree(dst->lt);
+out_lt:
+	for (dst--; i > 0; i--) {
+		kvfree(dst->mt);
+		kvfree(dst->lt);
+		dst--;
+	}
+out_scratch_realloc:
+	for_each_possible_cpu(i)
+		kfree(*per_cpu_ptr(new->scratch, i));
+#ifdef NFT_PIPAPO_ALIGN
+	free_percpu(new->scratch_aligned);
+#endif
+out_scratch:
+	free_percpu(new->scratch);
+	kfree(new);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * pipapo_rules_same_key() - Get number of rules originated from the same entry
+ * @f:		Field containing mapping table
+ * @first:	Index of first rule in set of rules mapping to same entry
+ *
+ * Using the fact that all rules in a field that originated from the same entry
+ * will map to the same set of rules in the next field, or to the same element
+ * reference, return the cardinality of the set of rules that originated from
+ * the same entry as the rule with index @first, @first rule included.
+ *
+ * In pictures:
+ *				rules
+ *	field #0		0    1    2    3    4
+ *		map to:		0    1   2-4  2-4  5-9
+ *				.    .    .......   . ...
+ *				|    |    |    | \   \
+ *				|    |    |    |  \   \
+ *				|    |    |    |   \   \
+ *				'    '    '    '    '   \
+ *	in field #1		0    1    2    3    4    5 ...
+ *
+ * if this is called for rule 2 on field #0, it will return 3, as also rules 2
+ * and 3 in field 0 map to the same set of rules (2, 3, 4) in the next field.
+ *
+ * For the last field in a set, we can rely on associated entries to map to the
+ * same element references.
+ *
+ * Return: Number of rules that originated from the same entry as @first.
+ */
+static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
+{
+	struct nft_pipapo_elem *e = NULL; /* Keep gcc happy */
+	int r;
+
+	for (r = first; r < f->rules; r++) {
+		if (r != first && e != f->mt[r].e)
+			return r - first;
+
+		e = f->mt[r].e;
+	}
+
+	if (r != first)
+		return r - first;
+
+	return 0;
+}
+
+/**
+ * pipapo_unmap() - Remove rules from mapping tables, renumber remaining ones
+ * @mt:		Mapping array
+ * @rules:	Original amount of rules in mapping table
+ * @start:	First rule index to be removed
+ * @n:		Amount of rules to be removed
+ * @to_offset:	First rule index, in next field, this group of rules maps to
+ * @is_last:	If this is the last field, delete reference from mapping array
+ *
+ * This is used to unmap rules from the mapping table for a single field,
+ * maintaining consistency and compactness for the existing ones.
+ *
+ * In pictures: let's assume that we want to delete rules 2 and 3 from the
+ * following mapping array:
+ *
+ *                 rules
+ *               0      1      2      3      4
+ *      map to:  4-10   4-10   11-15  11-15  16-18
+ *
+ * the result will be:
+ *
+ *                 rules
+ *               0      1      2
+ *      map to:  4-10   4-10   11-13
+ *
+ * for fields before the last one. In case this is the mapping table for the
+ * last field in a set, and rules map to pointers to &struct nft_pipapo_elem:
+ *
+ *                      rules
+ *                        0      1      2      3      4
+ *  element pointers:  0x42   0x42   0x33   0x33   0x44
+ *
+ * the result will be:
+ *
+ *                      rules
+ *                        0      1      2
+ *  element pointers:  0x42   0x42   0x44
+ */
+static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules,
+			 int start, int n, int to_offset, bool is_last)
+{
+	int i;
+
+	memmove(mt + start, mt + start + n, (rules - start - n) * sizeof(*mt));
+	memset(mt + rules - n, 0, n * sizeof(*mt));
+
+	if (is_last)
+		return;
+
+	for (i = start; i < rules - n; i++)
+		mt[i].to -= to_offset;
+}
+
+/**
+ * pipapo_drop() - Delete entry from lookup and mapping tables, given rule map
+ * @m:		Matching data
+ * @rulemap:	Table of rule maps, arrays of first rule and amount of rules
+ *		in next field a given entry maps to, for each field
+ *
+ * For each rule in lookup table buckets mapping to this set of rules, drop
+ * all bits set in lookup table mapping. In pictures, assuming we want to drop
+ * rules 0 and 1 from this lookup table:
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0    0                                              1,2
+ *        1   1,2                                      0
+ *        2    0                                      1,2
+ *        3    0                              1,2
+ *        4  0,1,2
+ *        5    0   1   2
+ *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
+ *
+ * rule 2 becomes rule 0, and the result will be:
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0                                                    0
+ *        1    0
+ *        2                                            0
+ *        3                                    0
+ *        4    0
+ *        5            0
+ *        6    0
+ *        7    0   0
+ *
+ * once this is done, call unmap() to drop all the corresponding rule references
+ * from mapping tables.
+ */
+static void pipapo_drop(struct nft_pipapo_match *m,
+			union nft_pipapo_map_bucket rulemap[])
+{
+	struct nft_pipapo_field *f;
+	int i;
+
+	nft_pipapo_for_each_field(f, i, m) {
+		int g;
+
+		for (g = 0; g < f->groups; g++) {
+			unsigned long *pos;
+			int b;
+
+			pos = NFT_PIPAPO_LT_ALIGN(f->lt) + g *
+			      NFT_PIPAPO_BUCKETS(f->bb) * f->bsize;
+
+			for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) {
+				bitmap_cut(pos, pos, rulemap[i].to,
+					   rulemap[i].n,
+					   f->bsize * BITS_PER_LONG);
+
+				pos += f->bsize;
+			}
+		}
+
+		pipapo_unmap(f->mt, f->rules, rulemap[i].to, rulemap[i].n,
+			     rulemap[i + 1].n, i == m->field_count - 1);
+		if (pipapo_resize(f, f->rules, f->rules - rulemap[i].n)) {
+			/* We can ignore this, a failure to shrink tables down
+			 * doesn't make tables invalid.
+			 */
+			;
+		}
+		f->rules -= rulemap[i].n;
+
+		pipapo_lt_bits_adjust(f);
+	}
+}
+
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+				     struct nft_pipapo_elem *e)
+
+{
+	struct nft_set_elem elem = {
+		.priv	= e,
+	};
+
+	nft_setelem_data_deactivate(net, set, &elem);
+}
+
+/**
+ * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * @_set:	nftables API set representation
+ * @m:		Matching data
+ */
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+{
+	struct nft_set *set = (struct nft_set *) _set;
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct net *net = read_pnet(&set->net);
+	int rules_f0, first_rule = 0;
+	struct nft_pipapo_elem *e;
+	struct nft_trans_gc *gc;
+
+	gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+	if (!gc)
+		return;
+
+	while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
+		union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
+		struct nft_pipapo_field *f;
+		int i, start, rules_fx;
+
+		start = first_rule;
+		rules_fx = rules_f0;
+
+		nft_pipapo_for_each_field(f, i, m) {
+			rulemap[i].to = start;
+			rulemap[i].n = rules_fx;
+
+			if (i < m->field_count - 1) {
+				rules_fx = f->mt[start].n;
+				start = f->mt[start].to;
+			}
+		}
+
+		/* Pick the last field, and its last index */
+		f--;
+		i--;
+		e = f->mt[rulemap[i].to].e;
+
+		/* synchronous gc never fails, there is no need to set on
+		 * NFT_SET_ELEM_DEAD_BIT.
+		 */
+		if (nft_set_elem_expired(&e->ext)) {
+			priv->dirty = true;
+
+			gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+			if (!gc)
+				return;
+
+			nft_pipapo_gc_deactivate(net, set, e);
+			pipapo_drop(m, rulemap);
+			nft_trans_gc_elem_add(gc, e);
+
+			/* And check again current first rule, which is now the
+			 * first we haven't checked.
+			 */
+		} else {
+			first_rule += rules_f0;
+		}
+	}
+
+	gc = nft_trans_gc_catchall_sync(gc);
+	if (gc) {
+		nft_trans_gc_queue_sync_done(gc);
+		priv->last_gc = jiffies;
+	}
+}
+
+/**
+ * pipapo_free_fields() - Free per-field tables contained in matching data
+ * @m:		Matching data
+ */
+static void pipapo_free_fields(struct nft_pipapo_match *m)
+{
+	struct nft_pipapo_field *f;
+	int i;
+
+	nft_pipapo_for_each_field(f, i, m) {
+		kvfree(f->lt);
+		kvfree(f->mt);
+	}
+}
+
+static void pipapo_free_match(struct nft_pipapo_match *m)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		kfree(*per_cpu_ptr(m->scratch, i));
+
+#ifdef NFT_PIPAPO_ALIGN
+	free_percpu(m->scratch_aligned);
+#endif
+	free_percpu(m->scratch);
+
+	pipapo_free_fields(m);
+
+	kfree(m);
+}
+
+/**
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu:	RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+	struct nft_pipapo_match *m;
+
+	m = container_of(rcu, struct nft_pipapo_match, rcu);
+	pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
+ * @set:	nftables API set representation
+ *
+ * While at it, check if we should perform garbage collection on the working
+ * copy before committing it for lookup, and don't replace the table if the
+ * working copy doesn't have pending changes.
+ *
+ * We also need to create a new working copy for subsequent insertions and
+ * deletions.
+ */
+static void nft_pipapo_commit(const struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *new_clone, *old;
+
+	if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+		pipapo_gc(set, priv->clone);
+
+	if (!priv->dirty)
+		return;
+
+	new_clone = pipapo_clone(priv->clone);
+	if (IS_ERR(new_clone))
+		return;
+
+	priv->dirty = false;
+
+	old = rcu_access_pointer(priv->match);
+	rcu_assign_pointer(priv->match, priv->clone);
+	if (old)
+		call_rcu(&old->rcu, pipapo_reclaim_match);
+
+	priv->clone = new_clone;
+}
+
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	const struct net *net = read_pnet(&set->net);
+
+	return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+	return true;
+#endif
+}
+
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *new_clone, *m;
+
+	if (!priv->dirty)
+		return;
+
+	m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
+
+	new_clone = pipapo_clone(m);
+	if (IS_ERR(new_clone))
+		return;
+
+	priv->dirty = false;
+
+	pipapo_free_match(priv->clone);
+	priv->clone = new_clone;
+}
+
+/**
+ * nft_pipapo_activate() - Mark element reference as active given key, commit
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ *
+ * On insertion, elements are added to a copy of the matching data currently
+ * in use for lookups, and not directly inserted into current lookup data. Both
+ * nft_pipapo_insert() and nft_pipapo_activate() are called once for each
+ * element, hence we can't purpose either one as a real commit operation.
+ */
+static void nft_pipapo_activate(const struct net *net,
+				const struct nft_set *set,
+				const struct nft_set_elem *elem)
+{
+	struct nft_pipapo_elem *e = elem->priv;
+
+	nft_set_elem_change_active(net, set, &e->ext);
+}
+
+/**
+ * pipapo_deactivate() - Check that element is in set, mark as inactive
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @data:	Input key data
+ * @ext:	nftables API extension pointer, used to check for end element
+ *
+ * This is a convenience function that can be called from both
+ * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same
+ * operation.
+ *
+ * Return: deactivated element if found, NULL otherwise.
+ */
+static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
+			       const u8 *data, const struct nft_set_ext *ext)
+{
+	struct nft_pipapo_elem *e;
+
+	e = pipapo_get(net, set, data, nft_genmask_next(net));
+	if (IS_ERR(e))
+		return NULL;
+
+	nft_set_elem_change_active(net, set, &e->ext);
+
+	return e;
+}
+
+/**
+ * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ *
+ * Return: deactivated element if found, NULL otherwise.
+ */
+static void *nft_pipapo_deactivate(const struct net *net,
+				   const struct nft_set *set,
+				   const struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
+}
+
+/**
+ * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ *
+ * This is functionally the same as nft_pipapo_deactivate(), with a slightly
+ * different interface, and it's also called once for each element in a set
+ * being flushed, so we can't implement, strictly speaking, a flush operation,
+ * which would otherwise be as simple as allocating an empty copy of the
+ * matching data.
+ *
+ * Note that we could in theory do that, mark the set as flushed, and ignore
+ * subsequent calls, but we would leak all the elements after the first one,
+ * because they wouldn't then be freed as result of API calls.
+ *
+ * Return: true if element was found and deactivated.
+ */
+static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
+			     void *elem)
+{
+	struct nft_pipapo_elem *e = elem;
+
+	return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
+				 &e->ext);
+}
+
+/**
+ * pipapo_get_boundaries() - Get byte interval for associated rules
+ * @f:		Field including lookup table
+ * @first_rule:	First rule (lowest index)
+ * @rule_count:	Number of associated rules
+ * @left:	Byte expression for left boundary (start of range)
+ * @right:	Byte expression for right boundary (end of range)
+ *
+ * Given the first rule and amount of rules that originated from the same entry,
+ * build the original range associated with the entry, and calculate the length
+ * of the originating netmask.
+ *
+ * In pictures:
+ *
+ *                     bucket
+ *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+ *        0                                                   1,2
+ *        1   1,2
+ *        2                                           1,2
+ *        3                                   1,2
+ *        4   1,2
+ *        5        1   2
+ *        6   1,2  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *        7   1,2 1,2  1   1   1   1   1   1   1   1   1   1   1   1   1   1
+ *
+ * this is the lookup table corresponding to the IPv4 range
+ * 192.168.1.0-192.168.2.1, which was expanded to the two composing netmasks,
+ * rule #1: 192.168.1.0/24, and rule #2: 192.168.2.0/31.
+ *
+ * This function fills @left and @right with the byte values of the leftmost
+ * and rightmost bucket indices for the lowest and highest rule indices,
+ * respectively. If @first_rule is 1 and @rule_count is 2, we obtain, in
+ * nibbles:
+ *   left:  < 12, 0, 10, 8, 0, 1, 0, 0 >
+ *   right: < 12, 0, 10, 8, 0, 2, 2, 1 >
+ * corresponding to bytes:
+ *   left:  < 192, 168, 1, 0 >
+ *   right: < 192, 168, 2, 1 >
+ * with mask length irrelevant here, unused on return, as the range is already
+ * defined by its start and end points. The mask length is relevant for a single
+ * ranged entry instead: if @first_rule is 1 and @rule_count is 1, we ignore
+ * rule 2 above: @left becomes < 192, 168, 1, 0 >, @right becomes
+ * < 192, 168, 1, 255 >, and the mask length, calculated from the distances
+ * between leftmost and rightmost bucket indices for each group, would be 24.
+ *
+ * Return: mask length, in bits.
+ */
+static int pipapo_get_boundaries(struct nft_pipapo_field *f, int first_rule,
+				 int rule_count, u8 *left, u8 *right)
+{
+	int g, mask_len = 0, bit_offset = 0;
+	u8 *l = left, *r = right;
+
+	for (g = 0; g < f->groups; g++) {
+		int b, x0, x1;
+
+		x0 = -1;
+		x1 = -1;
+		for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) {
+			unsigned long *pos;
+
+			pos = NFT_PIPAPO_LT_ALIGN(f->lt) +
+			      (g * NFT_PIPAPO_BUCKETS(f->bb) + b) * f->bsize;
+			if (test_bit(first_rule, pos) && x0 == -1)
+				x0 = b;
+			if (test_bit(first_rule + rule_count - 1, pos))
+				x1 = b;
+		}
+
+		*l |= x0 << (BITS_PER_BYTE - f->bb - bit_offset);
+		*r |= x1 << (BITS_PER_BYTE - f->bb - bit_offset);
+
+		bit_offset += f->bb;
+		if (bit_offset >= BITS_PER_BYTE) {
+			bit_offset %= BITS_PER_BYTE;
+			l++;
+			r++;
+		}
+
+		if (x1 - x0 == 0)
+			mask_len += 4;
+		else if (x1 - x0 == 1)
+			mask_len += 3;
+		else if (x1 - x0 == 3)
+			mask_len += 2;
+		else if (x1 - x0 == 7)
+			mask_len += 1;
+	}
+
+	return mask_len;
+}
+
+/**
+ * pipapo_match_field() - Match rules against byte ranges
+ * @f:		Field including the lookup table
+ * @first_rule:	First of associated rules originating from same entry
+ * @rule_count:	Amount of associated rules
+ * @start:	Start of range to be matched
+ * @end:	End of range to be matched
+ *
+ * Return: true on match, false otherwise.
+ */
+static bool pipapo_match_field(struct nft_pipapo_field *f,
+			       int first_rule, int rule_count,
+			       const u8 *start, const u8 *end)
+{
+	u8 right[NFT_PIPAPO_MAX_BYTES] = { 0 };
+	u8 left[NFT_PIPAPO_MAX_BYTES] = { 0 };
+
+	pipapo_get_boundaries(f, first_rule, rule_count, left, right);
+
+	return !memcmp(start, left,
+		       f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) &&
+	       !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f));
+}
+
+/**
+ * nft_pipapo_remove() - Remove element given key, commit
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @elem:	nftables API element representation containing key data
+ *
+ * Similarly to nft_pipapo_activate(), this is used as commit operation by the
+ * API, but it's called once per element in the pending transaction, so we can't
+ * implement this as a single commit operation. Closest we can get is to remove
+ * the matched element here, if any, and commit the updated matching data.
+ */
+static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *m = priv->clone;
+	struct nft_pipapo_elem *e = elem->priv;
+	int rules_f0, first_rule = 0;
+	const u8 *data;
+
+	data = (const u8 *)nft_set_ext_key(&e->ext);
+
+	while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
+		union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
+		const u8 *match_start, *match_end;
+		struct nft_pipapo_field *f;
+		int i, start, rules_fx;
+
+		match_start = data;
+
+		if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+			match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+		else
+			match_end = data;
+
+		start = first_rule;
+		rules_fx = rules_f0;
+
+		nft_pipapo_for_each_field(f, i, m) {
+			if (!pipapo_match_field(f, start, rules_fx,
+						match_start, match_end))
+				break;
+
+			rulemap[i].to = start;
+			rulemap[i].n = rules_fx;
+
+			rules_fx = f->mt[start].n;
+			start = f->mt[start].to;
+
+			match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+			match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+		}
+
+		if (i == m->field_count) {
+			priv->dirty = true;
+			pipapo_drop(m, rulemap);
+			return;
+		}
+
+		first_rule += rules_f0;
+	}
+}
+
+/**
+ * nft_pipapo_walk() - Walk over elements
+ * @ctx:	nftables API context
+ * @set:	nftables API set representation
+ * @iter:	Iterator
+ *
+ * As elements are referenced in the mapping array for the last field, directly
+ * scan that array: there's no need to follow rule mappings from the first
+ * field.
+ */
+static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+			    struct nft_set_iter *iter)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct net *net = read_pnet(&set->net);
+	struct nft_pipapo_match *m;
+	struct nft_pipapo_field *f;
+	int i, r;
+
+	rcu_read_lock();
+	if (iter->genmask == nft_genmask_cur(net))
+		m = rcu_dereference(priv->match);
+	else
+		m = priv->clone;
+
+	if (unlikely(!m))
+		goto out;
+
+	for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+		;
+
+	for (r = 0; r < f->rules; r++) {
+		struct nft_pipapo_elem *e;
+		struct nft_set_elem elem;
+
+		if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+			continue;
+
+		if (iter->count < iter->skip)
+			goto cont;
+
+		e = f->mt[r].e;
+
+		if (!nft_set_elem_active(&e->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = e;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0)
+			goto out;
+
+cont:
+		iter->count++;
+	}
+
+out:
+	rcu_read_unlock();
+}
+
+/**
+ * nft_pipapo_privsize() - Return the size of private data for the set
+ * @nla:	netlink attributes, ignored as size doesn't depend on them
+ * @desc:	Set description, ignored as size doesn't depend on it
+ *
+ * Return: size of private data for this set implementation, in bytes
+ */
+static u64 nft_pipapo_privsize(const struct nlattr * const nla[],
+			       const struct nft_set_desc *desc)
+{
+	return sizeof(struct nft_pipapo);
+}
+
+/**
+ * nft_pipapo_estimate() - Set size, space and lookup complexity
+ * @desc:	Set description, element count and field description used
+ * @features:	Flags: NFT_SET_INTERVAL needs to be there
+ * @est:	Storage for estimation data
+ *
+ * Return: true if set description is compatible, false otherwise
+ */
+static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features,
+				struct nft_set_estimate *est)
+{
+	if (!(features & NFT_SET_INTERVAL) ||
+	    desc->field_count < NFT_PIPAPO_MIN_FIELDS)
+		return false;
+
+	est->size = pipapo_estimate_size(desc);
+	if (!est->size)
+		return false;
+
+	est->lookup = NFT_SET_CLASS_O_LOG_N;
+
+	est->space = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+/**
+ * nft_pipapo_init() - Initialise data for a set instance
+ * @set:	nftables API set representation
+ * @desc:	Set description
+ * @nla:	netlink attributes
+ *
+ * Validate number and size of fields passed as NFTA_SET_DESC_CONCAT netlink
+ * attributes, initialise internal set parameters, current instance of matching
+ * data and a copy for subsequent insertions.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int nft_pipapo_init(const struct nft_set *set,
+			   const struct nft_set_desc *desc,
+			   const struct nlattr * const nla[])
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *m;
+	struct nft_pipapo_field *f;
+	int err, i, field_count;
+
+	field_count = desc->field_count ? : 1;
+
+	if (field_count > NFT_PIPAPO_MAX_FIELDS)
+		return -EINVAL;
+
+	m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
+		    GFP_KERNEL);
+	if (!m)
+		return -ENOMEM;
+
+	m->field_count = field_count;
+	m->bsize_max = 0;
+
+	m->scratch = alloc_percpu(unsigned long *);
+	if (!m->scratch) {
+		err = -ENOMEM;
+		goto out_scratch;
+	}
+	for_each_possible_cpu(i)
+		*per_cpu_ptr(m->scratch, i) = NULL;
+
+#ifdef NFT_PIPAPO_ALIGN
+	m->scratch_aligned = alloc_percpu(unsigned long *);
+	if (!m->scratch_aligned) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+	for_each_possible_cpu(i)
+		*per_cpu_ptr(m->scratch_aligned, i) = NULL;
+#endif
+
+	rcu_head_init(&m->rcu);
+
+	nft_pipapo_for_each_field(f, i, m) {
+		int len = desc->field_len[i] ? : set->klen;
+
+		f->bb = NFT_PIPAPO_GROUP_BITS_INIT;
+		f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f);
+
+		priv->width += round_up(len, sizeof(u32));
+
+		f->bsize = 0;
+		f->rules = 0;
+		NFT_PIPAPO_LT_ASSIGN(f, NULL);
+		f->mt = NULL;
+	}
+
+	/* Create an initial clone of matching data for next insertion */
+	priv->clone = pipapo_clone(m);
+	if (IS_ERR(priv->clone)) {
+		err = PTR_ERR(priv->clone);
+		goto out_free;
+	}
+
+	priv->dirty = false;
+
+	rcu_assign_pointer(priv->match, m);
+
+	return 0;
+
+out_free:
+#ifdef NFT_PIPAPO_ALIGN
+	free_percpu(m->scratch_aligned);
+#endif
+	free_percpu(m->scratch);
+out_scratch:
+	kfree(m);
+
+	return err;
+}
+
+/**
+ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @ctx:	context
+ * @set:	nftables API set representation
+ * @m:		matching data pointing to key mapping array
+ */
+static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
+					 const struct nft_set *set,
+					 struct nft_pipapo_match *m)
+{
+	struct nft_pipapo_field *f;
+	int i, r;
+
+	for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+		;
+
+	for (r = 0; r < f->rules; r++) {
+		struct nft_pipapo_elem *e;
+
+		if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+			continue;
+
+		e = f->mt[r].e;
+
+		nf_tables_set_elem_destroy(ctx, set, e);
+	}
+}
+
+/**
+ * nft_pipapo_destroy() - Free private data for set and all committed elements
+ * @ctx:	context
+ * @set:	nftables API set representation
+ */
+static void nft_pipapo_destroy(const struct nft_ctx *ctx,
+			       const struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *m;
+	int cpu;
+
+	m = rcu_dereference_protected(priv->match, true);
+	if (m) {
+		rcu_barrier();
+
+		nft_set_pipapo_match_destroy(ctx, set, m);
+
+#ifdef NFT_PIPAPO_ALIGN
+		free_percpu(m->scratch_aligned);
+#endif
+		for_each_possible_cpu(cpu)
+			kfree(*per_cpu_ptr(m->scratch, cpu));
+		free_percpu(m->scratch);
+		pipapo_free_fields(m);
+		kfree(m);
+		priv->match = NULL;
+	}
+
+	if (priv->clone) {
+		m = priv->clone;
+
+		if (priv->dirty)
+			nft_set_pipapo_match_destroy(ctx, set, m);
+
+#ifdef NFT_PIPAPO_ALIGN
+		free_percpu(priv->clone->scratch_aligned);
+#endif
+		for_each_possible_cpu(cpu)
+			kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+		free_percpu(priv->clone->scratch);
+
+		pipapo_free_fields(priv->clone);
+		kfree(priv->clone);
+		priv->clone = NULL;
+	}
+}
+
+/**
+ * nft_pipapo_gc_init() - Initialise garbage collection
+ * @set:	nftables API set representation
+ *
+ * Instead of actually setting up a periodic work for garbage collection, as
+ * this operation requires a swap of matching data with the working copy, we'll
+ * do that opportunistically with other commit operations if the interval is
+ * elapsed, so we just need to set the current jiffies timestamp here.
+ */
+static void nft_pipapo_gc_init(const struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+
+	priv->last_gc = jiffies;
+}
+
+const struct nft_set_type nft_set_pipapo_type = {
+	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
+			  NFT_SET_TIMEOUT,
+	.ops		= {
+		.lookup		= nft_pipapo_lookup,
+		.insert		= nft_pipapo_insert,
+		.activate	= nft_pipapo_activate,
+		.deactivate	= nft_pipapo_deactivate,
+		.flush		= nft_pipapo_flush,
+		.remove		= nft_pipapo_remove,
+		.walk		= nft_pipapo_walk,
+		.get		= nft_pipapo_get,
+		.privsize	= nft_pipapo_privsize,
+		.estimate	= nft_pipapo_estimate,
+		.init		= nft_pipapo_init,
+		.destroy	= nft_pipapo_destroy,
+		.gc_init	= nft_pipapo_gc_init,
+		.commit		= nft_pipapo_commit,
+		.abort		= nft_pipapo_abort,
+		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
+	},
+};
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+const struct nft_set_type nft_set_pipapo_avx2_type = {
+	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
+			  NFT_SET_TIMEOUT,
+	.ops		= {
+		.lookup		= nft_pipapo_avx2_lookup,
+		.insert		= nft_pipapo_insert,
+		.activate	= nft_pipapo_activate,
+		.deactivate	= nft_pipapo_deactivate,
+		.flush		= nft_pipapo_flush,
+		.remove		= nft_pipapo_remove,
+		.walk		= nft_pipapo_walk,
+		.get		= nft_pipapo_get,
+		.privsize	= nft_pipapo_privsize,
+		.estimate	= nft_pipapo_avx2_estimate,
+		.init		= nft_pipapo_init,
+		.destroy	= nft_pipapo_destroy,
+		.gc_init	= nft_pipapo_gc_init,
+		.commit		= nft_pipapo_commit,
+		.abort		= nft_pipapo_abort,
+		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
+	},
+};
+#endif
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
new file mode 100644
index 000000000..25a755915
--- /dev/null
+++ b/net/netfilter/nft_set_pipapo.h
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef _NFT_SET_PIPAPO_H
+
+#include <linux/log2.h>
+#include <net/ipv6.h>			/* For the maximum length of a field */
+
+/* Count of concatenated fields depends on count of 32-bit nftables registers */
+#define NFT_PIPAPO_MAX_FIELDS		NFT_REG32_COUNT
+
+/* Restrict usage to multiple fields, make sure rbtree is used otherwise */
+#define NFT_PIPAPO_MIN_FIELDS		2
+
+/* Largest supported field size */
+#define NFT_PIPAPO_MAX_BYTES		(sizeof(struct in6_addr))
+#define NFT_PIPAPO_MAX_BITS		(NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
+
+/* Bits to be grouped together in table buckets depending on set size */
+#define NFT_PIPAPO_GROUP_BITS_INIT	NFT_PIPAPO_GROUP_BITS_SMALL_SET
+#define NFT_PIPAPO_GROUP_BITS_SMALL_SET	8
+#define NFT_PIPAPO_GROUP_BITS_LARGE_SET	4
+#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4				\
+	BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) ||		\
+		     (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
+#define NFT_PIPAPO_GROUPS_PER_BYTE(f)	(BITS_PER_BYTE / (f)->bb)
+
+/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
+ * small group width, and switch to the big group width if the table gets
+ * smaller than NFT_PIPAPO_LT_SIZE_LOW.
+ *
+ * Picking 2MiB as threshold (for a single table) avoids as much as possible
+ * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
+ * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
+ * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
+ */
+#define NFT_PIPAPO_LT_SIZE_THRESHOLD	(1 << 21)
+#define NFT_PIPAPO_LT_SIZE_HYSTERESIS	(1 << 16)
+#define NFT_PIPAPO_LT_SIZE_HIGH		NFT_PIPAPO_LT_SIZE_THRESHOLD
+#define NFT_PIPAPO_LT_SIZE_LOW		NFT_PIPAPO_LT_SIZE_THRESHOLD -	\
+					NFT_PIPAPO_LT_SIZE_HYSTERESIS
+
+/* Fields are padded to 32 bits in input registers */
+#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f)				\
+	(round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
+#define NFT_PIPAPO_GROUPS_PADDING(f)					\
+	(NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups /		\
+					    NFT_PIPAPO_GROUPS_PER_BYTE(f))
+
+/* Number of buckets given by 2 ^ n, with n bucket bits */
+#define NFT_PIPAPO_BUCKETS(bb)		(1 << (bb))
+
+/* Each n-bit range maps to up to n * 2 rules */
+#define NFT_PIPAPO_MAP_NBITS		(const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
+
+/* Use the rest of mapping table buckets for rule indices, but it makes no sense
+ * to exceed 32 bits
+ */
+#if BITS_PER_LONG == 64
+#define NFT_PIPAPO_MAP_TOBITS		32
+#else
+#define NFT_PIPAPO_MAP_TOBITS		(BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
+#endif
+
+/* ...which gives us the highest allowed index for a rule */
+#define NFT_PIPAPO_RULE0_MAX		((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
+					- (1UL << NFT_PIPAPO_MAP_NBITS))
+
+/* Definitions for vectorised implementations */
+#ifdef NFT_PIPAPO_ALIGN
+#define NFT_PIPAPO_ALIGN_HEADROOM					\
+	(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
+#define NFT_PIPAPO_LT_ALIGN(lt)		(PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
+#define NFT_PIPAPO_LT_ASSIGN(field, x)					\
+	do {								\
+		(field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x);		\
+		(field)->lt = (x);					\
+	} while (0)
+#else
+#define NFT_PIPAPO_ALIGN_HEADROOM	0
+#define NFT_PIPAPO_LT_ALIGN(lt)		(lt)
+#define NFT_PIPAPO_LT_ASSIGN(field, x)	((field)->lt = (x))
+#endif /* NFT_PIPAPO_ALIGN */
+
+#define nft_pipapo_for_each_field(field, index, match)		\
+	for ((field) = (match)->f, (index) = 0;			\
+	     (index) < (match)->field_count;			\
+	     (index)++, (field)++)
+
+/**
+ * union nft_pipapo_map_bucket - Bucket of mapping table
+ * @to:		First rule number (in next field) this rule maps to
+ * @n:		Number of rules (in next field) this rule maps to
+ * @e:		If there's no next field, pointer to element this rule maps to
+ */
+union nft_pipapo_map_bucket {
+	struct {
+#if BITS_PER_LONG == 64
+		static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
+		u32 to;
+
+		static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
+		u32 n;
+#else
+		unsigned long to:NFT_PIPAPO_MAP_TOBITS;
+		unsigned long  n:NFT_PIPAPO_MAP_NBITS;
+#endif
+	};
+	struct nft_pipapo_elem *e;
+};
+
+/**
+ * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
+ * @groups:	Amount of bit groups
+ * @rules:	Number of inserted rules
+ * @bsize:	Size of each bucket in lookup table, in longs
+ * @bb:		Number of bits grouped together in lookup table buckets
+ * @lt:		Lookup table: 'groups' rows of buckets
+ * @lt_aligned:	Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
+ * @mt:		Mapping table: one bucket per rule
+ */
+struct nft_pipapo_field {
+	int groups;
+	unsigned long rules;
+	size_t bsize;
+	int bb;
+#ifdef NFT_PIPAPO_ALIGN
+	unsigned long *lt_aligned;
+#endif
+	unsigned long *lt;
+	union nft_pipapo_map_bucket *mt;
+};
+
+/**
+ * struct nft_pipapo_match - Data used for lookup and matching
+ * @field_count		Amount of fields in set
+ * @scratch:		Preallocated per-CPU maps for partial matching results
+ * @scratch_aligned:	Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
+ * @bsize_max:		Maximum lookup table bucket size of all fields, in longs
+ * @rcu			Matching data is swapped on commits
+ * @f:			Fields, with lookup and mapping tables
+ */
+struct nft_pipapo_match {
+	int field_count;
+#ifdef NFT_PIPAPO_ALIGN
+	unsigned long * __percpu *scratch_aligned;
+#endif
+	unsigned long * __percpu *scratch;
+	size_t bsize_max;
+	struct rcu_head rcu;
+	struct nft_pipapo_field f[];
+};
+
+/**
+ * struct nft_pipapo - Representation of a set
+ * @match:	Currently in-use matching data
+ * @clone:	Copy where pending insertions and deletions are kept
+ * @width:	Total bytes to be matched for one packet, including padding
+ * @dirty:	Working copy has pending insertions or deletions
+ * @last_gc:	Timestamp of last garbage collection run, jiffies
+ */
+struct nft_pipapo {
+	struct nft_pipapo_match __rcu *match;
+	struct nft_pipapo_match *clone;
+	int width;
+	bool dirty;
+	unsigned long last_gc;
+};
+
+struct nft_pipapo_elem;
+
+/**
+ * struct nft_pipapo_elem - API-facing representation of single set element
+ * @ext:	nftables API extensions
+ */
+struct nft_pipapo_elem {
+	struct nft_set_ext ext;
+};
+
+int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
+		  union nft_pipapo_map_bucket *mt, bool match_only);
+
+/**
+ * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
+ * @f:		Field including lookup table
+ * @dst:	Area to store result
+ * @data:	Input data selecting table buckets
+ */
+static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
+						 unsigned long *dst,
+						 const u8 *data)
+{
+	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
+	int group;
+
+	for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
+		u8 v;
+
+		v = *data >> 4;
+		__bitmap_and(dst, dst, lt + v * f->bsize,
+			     f->bsize * BITS_PER_LONG);
+		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
+
+		v = *data & 0x0f;
+		__bitmap_and(dst, dst, lt + v * f->bsize,
+			     f->bsize * BITS_PER_LONG);
+		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
+	}
+}
+
+/**
+ * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
+ * @f:		Field including lookup table
+ * @dst:	Area to store result
+ * @data:	Input data selecting table buckets
+ */
+static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
+						 unsigned long *dst,
+						 const u8 *data)
+{
+	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
+	int group;
+
+	for (group = 0; group < f->groups; group++, data++) {
+		__bitmap_and(dst, dst, lt + *data * f->bsize,
+			     f->bsize * BITS_PER_LONG);
+		lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
+	}
+}
+
+/**
+ * pipapo_estimate_size() - Estimate worst-case for set size
+ * @desc:	Set description, element count and field description used here
+ *
+ * The size for this set type can vary dramatically, as it depends on the number
+ * of rules (composing netmasks) the entries expand to. We compute the worst
+ * case here.
+ *
+ * In general, for a non-ranged entry or a single composing netmask, we need
+ * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
+ * is, each input bit needs four bits of matching data), plus a bucket in the
+ * mapping table for each field.
+ *
+ * Return: worst-case set size in bytes, 0 on any overflow
+ */
+static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
+{
+	unsigned long entry_size;
+	u64 size;
+	int i;
+
+	for (i = 0, entry_size = 0; i < desc->field_count; i++) {
+		unsigned long rules;
+
+		if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
+			return 0;
+
+		/* Worst-case ranges for each concatenated field: each n-bit
+		 * field can expand to up to n * 2 rules in each bucket, and
+		 * each rule also needs a mapping bucket.
+		 */
+		rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
+		entry_size += rules *
+			      NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
+			      BITS_PER_BYTE;
+		entry_size += rules * sizeof(union nft_pipapo_map_bucket);
+	}
+
+	/* Rules in lookup and mapping tables are needed for each entry */
+	size = desc->size * entry_size;
+	if (size && div_u64(size, desc->size) != entry_size)
+		return 0;
+
+	size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
+
+	size += sizeof(struct nft_pipapo_field) * desc->field_count;
+
+	return size;
+}
+
+#endif /* _NFT_SET_PIPAPO_H */
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
new file mode 100644
index 000000000..52e0d026d
--- /dev/null
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -0,0 +1,1228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines
+ *
+ * Copyright (c) 2019-2020 Red Hat GmbH
+ *
+ * Author: Stefano Brivio <sbrivio@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <uapi/linux/netfilter/nf_tables.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include <linux/compiler.h>
+#include <asm/fpu/api.h>
+
+#include "nft_set_pipapo_avx2.h"
+#include "nft_set_pipapo.h"
+
+#define NFT_PIPAPO_LONGS_PER_M256	(XSAVE_YMM_SIZE / BITS_PER_LONG)
+
+/* Load from memory into YMM register with non-temporal hint ("stream load"),
+ * that is, don't fetch lines from memory into the cache. This avoids pushing
+ * precious packet data out of the cache hierarchy, and is appropriate when:
+ *
+ * - loading buckets from lookup tables, as they are not going to be used
+ *   again before packets are entirely classified
+ *
+ * - loading the result bitmap from the previous field, as it's never used
+ *   again
+ */
+#define NFT_PIPAPO_AVX2_LOAD(reg, loc)					\
+	asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc))
+
+/* Stream a single lookup table bucket into YMM register given lookup table,
+ * group index, value of packet bits, bucket size.
+ */
+#define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize)		\
+	NFT_PIPAPO_AVX2_LOAD(reg,					\
+			     lt[((group) * NFT_PIPAPO_BUCKETS(4) +	\
+				 (v)) * (bsize)])
+#define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize)		\
+	NFT_PIPAPO_AVX2_LOAD(reg,					\
+			     lt[((group) * NFT_PIPAPO_BUCKETS(8) +	\
+				 (v)) * (bsize)])
+
+/* Bitwise AND: the staple operation of this algorithm */
+#define NFT_PIPAPO_AVX2_AND(dst, a, b)					\
+	asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst)
+
+/* Jump to label if @reg is zero */
+#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label)			\
+	asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";"	\
+			  "je %l[" #label "]" : : : : label)
+
+/* Store 256 bits from YMM register into memory. Contrary to bucket load
+ * operation, we don't bypass the cache here, as stored matching results
+ * are always used shortly after.
+ */
+#define NFT_PIPAPO_AVX2_STORE(loc, reg)					\
+	asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc))
+
+/* Zero out a complete YMM register, @reg */
+#define NFT_PIPAPO_AVX2_ZERO(reg)					\
+	asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
+
+/* Current working bitmap index, toggled between field matches */
+static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
+
+/**
+ * nft_pipapo_avx2_prepare() - Prepare before main algorithm body
+ *
+ * This zeroes out ymm15, which is later used whenever we need to clear a
+ * memory location, by storing its content into memory.
+ */
+static void nft_pipapo_avx2_prepare(void)
+{
+	NFT_PIPAPO_AVX2_ZERO(15);
+}
+
+/**
+ * nft_pipapo_avx2_fill() - Fill a bitmap region with ones
+ * @data:	Base memory area
+ * @start:	First bit to set
+ * @len:	Count of bits to fill
+ *
+ * This is nothing else than a version of bitmap_set(), as used e.g. by
+ * pipapo_refill(), tailored for the microarchitectures using it and better
+ * suited for the specific usage: it's very likely that we'll set a small number
+ * of bits, not crossing a word boundary, and correct branch prediction is
+ * critical here.
+ *
+ * This function doesn't actually use any AVX2 instruction.
+ */
+static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len)
+{
+	int offset = start % BITS_PER_LONG;
+	unsigned long mask;
+
+	data += start / BITS_PER_LONG;
+
+	if (likely(len == 1)) {
+		*data |= BIT(offset);
+		return;
+	}
+
+	if (likely(len < BITS_PER_LONG || offset)) {
+		if (likely(len + offset <= BITS_PER_LONG)) {
+			*data |= GENMASK(len - 1 + offset, offset);
+			return;
+		}
+
+		*data |= ~0UL << offset;
+		len -= BITS_PER_LONG - offset;
+		data++;
+
+		if (len <= BITS_PER_LONG) {
+			mask = ~0UL >> (BITS_PER_LONG - len);
+			*data |= mask;
+			return;
+		}
+	}
+
+	memset(data, 0xff, len / BITS_PER_BYTE);
+	data += len / BITS_PER_LONG;
+
+	len %= BITS_PER_LONG;
+	if (len)
+		*data |= ~0UL >> (BITS_PER_LONG - len);
+}
+
+/**
+ * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits
+ * @offset:	Start from given bitmap (equivalent to bucket) offset, in longs
+ * @map:	Bitmap to be scanned for set bits
+ * @dst:	Destination bitmap
+ * @mt:		Mapping table containing bit set specifiers
+ * @last:	Return index of first set bit, if this is the last field
+ *
+ * This is an alternative implementation of pipapo_refill() suitable for usage
+ * with AVX2 lookup routines: we know there are four words to be scanned, at
+ * a given offset inside the map, for each matching iteration.
+ *
+ * This function doesn't actually use any AVX2 instruction.
+ *
+ * Return: first set bit index if @last, index of first filled word otherwise.
+ */
+static int nft_pipapo_avx2_refill(int offset, unsigned long *map,
+				  unsigned long *dst,
+				  union nft_pipapo_map_bucket *mt, bool last)
+{
+	int ret = -1;
+
+#define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x)				\
+	do {								\
+		while (map[(x)]) {					\
+			int r = __builtin_ctzl(map[(x)]);		\
+			int i = (offset + (x)) * BITS_PER_LONG + r;	\
+									\
+			if (last)					\
+				return i;				\
+									\
+			nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n);	\
+									\
+			if (ret == -1)					\
+				ret = mt[i].to;				\
+									\
+			map[(x)] &= ~(1UL << r);			\
+		}							\
+	} while (0)
+
+	NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0);
+	NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1);
+	NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2);
+	NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3);
+#undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * Load buckets from lookup table corresponding to the values of each 4-bit
+ * group of packet bytes, and perform a bitwise intersection between them. If
+ * this is the first field in the set, simply AND the buckets together
+ * (equivalent to using an all-ones starting bitmap), use the provided starting
+ * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next
+ * working bitmap, @fill.
+ *
+ * This is used for 8-bit fields (i.e. protocol numbers).
+ *
+ * Out-of-order (and superscalar) execution is vital here, so it's critical to
+ * avoid false data dependencies. CPU and compiler could (mostly) take care of
+ * this on their own, but the operation ordering is explicitly given here with
+ * a likely execution order in mind, to highlight possible stalls. That's why
+ * a number of logically distinct operations (i.e. loading buckets, intersecting
+ * buckets) are interleaved.
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf };
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_AND(4, 0, 1);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
+			NFT_PIPAPO_AVX2_LOAD(2, map[i_ul]);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing);
+			NFT_PIPAPO_AVX2_AND(3, 0, 1);
+			NFT_PIPAPO_AVX2_AND(4, 2, 3);
+		}
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 16-bit fields (i.e. ports).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf };
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize);
+			NFT_PIPAPO_AVX2_AND(4, 0, 1);
+			NFT_PIPAPO_AVX2_AND(5, 2, 3);
+			NFT_PIPAPO_AVX2_AND(7, 4, 5);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
+
+			NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
+
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
+			NFT_PIPAPO_AVX2_AND(5, 0, 1);
+
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
+
+			NFT_PIPAPO_AVX2_AND(6, 2, 3);
+			NFT_PIPAPO_AVX2_AND(7, 4, 5);
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(7, 6, 7);
+		}
+
+		/* Stall */
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 7);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 32-bit fields (i.e. IPv4 addresses).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	u8 pg[8] = {  pkt[0] >> 4,  pkt[0] & 0xf,  pkt[1] >> 4,  pkt[1] & 0xf,
+		      pkt[2] >> 4,  pkt[2] & 0xf,  pkt[3] >> 4,  pkt[3] & 0xf,
+		   };
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0,  lt, 0, pg[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(1,  lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(2,  lt, 2, pg[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 3, pg[3], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(4,  lt, 4, pg[4], bsize);
+			NFT_PIPAPO_AVX2_AND(5,   0,  1);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(6,  lt, 5, pg[5], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(7,  lt, 6, pg[6], bsize);
+			NFT_PIPAPO_AVX2_AND(8,   2,  3);
+			NFT_PIPAPO_AVX2_AND(9,   4,  5);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize);
+			NFT_PIPAPO_AVX2_AND(11,  6,  7);
+			NFT_PIPAPO_AVX2_AND(12,  8,  9);
+			NFT_PIPAPO_AVX2_AND(13, 10, 11);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(1,  12, 13);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(0,  lt, 0, pg[0], bsize);
+			NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(2,  lt, 1, pg[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 2, pg[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(4,  lt, 3, pg[3], bsize);
+
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
+
+			NFT_PIPAPO_AVX2_AND(5,   0,  1);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(6,  lt, 4, pg[4], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(7,  lt, 5, pg[5], bsize);
+			NFT_PIPAPO_AVX2_AND(8,   2,  3);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(9,  lt, 6, pg[6], bsize);
+			NFT_PIPAPO_AVX2_AND(10,  4,  5);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize);
+			NFT_PIPAPO_AVX2_AND(12,  6,  7);
+			NFT_PIPAPO_AVX2_AND(13,  8,  9);
+			NFT_PIPAPO_AVX2_AND(14, 10, 11);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(1,  12, 13);
+			NFT_PIPAPO_AVX2_AND(1,   1, 14);
+		}
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 1);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
+				        struct nft_pipapo_field *f, int offset,
+				        const u8 *pkt, bool first, bool last)
+{
+	u8 pg[12] = {  pkt[0] >> 4,  pkt[0] & 0xf,  pkt[1] >> 4,  pkt[1] & 0xf,
+		       pkt[2] >> 4,  pkt[2] & 0xf,  pkt[3] >> 4,  pkt[3] & 0xf,
+		       pkt[4] >> 4,  pkt[4] & 0xf,  pkt[5] >> 4,  pkt[5] & 0xf,
+		    };
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (!first)
+			NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(1,  lt,  0,  pg[0], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(2,  lt,  1,  pg[1], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt,  2,  pg[2], bsize);
+
+		if (!first) {
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
+			NFT_PIPAPO_AVX2_AND(1, 1, 0);
+		}
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(4,  lt,  3,  pg[3], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(5,  lt,  4,  pg[4], bsize);
+		NFT_PIPAPO_AVX2_AND(6,   2,  3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(7,  lt,  5,  pg[5], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(8,  lt,  6,  pg[6], bsize);
+		NFT_PIPAPO_AVX2_AND(9,   1,  4);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt,  7,  pg[7], bsize);
+		NFT_PIPAPO_AVX2_AND(11,  5,  6);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt,  8,  pg[8], bsize);
+		NFT_PIPAPO_AVX2_AND(13,  7,  8);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt,  9,  pg[9], bsize);
+
+		NFT_PIPAPO_AVX2_AND(0,   9, 10);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(1,  lt, 10,  pg[10], bsize);
+		NFT_PIPAPO_AVX2_AND(2,  11, 12);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 11,  pg[11], bsize);
+		NFT_PIPAPO_AVX2_AND(4,  13, 14);
+		NFT_PIPAPO_AVX2_AND(5,   0,  1);
+
+		NFT_PIPAPO_AVX2_AND(6,   2,  3);
+
+		/* Stalls */
+		NFT_PIPAPO_AVX2_AND(7,   4,  5);
+		NFT_PIPAPO_AVX2_AND(8,   6,  7);
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 8);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 128-bit fields (i.e. IPv6 addresses).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
+					struct nft_pipapo_field *f, int offset,
+					const u8 *pkt, bool first, bool last)
+{
+	u8 pg[32] = {  pkt[0] >> 4,  pkt[0] & 0xf,  pkt[1] >> 4,  pkt[1] & 0xf,
+		       pkt[2] >> 4,  pkt[2] & 0xf,  pkt[3] >> 4,  pkt[3] & 0xf,
+		       pkt[4] >> 4,  pkt[4] & 0xf,  pkt[5] >> 4,  pkt[5] & 0xf,
+		       pkt[6] >> 4,  pkt[6] & 0xf,  pkt[7] >> 4,  pkt[7] & 0xf,
+		       pkt[8] >> 4,  pkt[8] & 0xf,  pkt[9] >> 4,  pkt[9] & 0xf,
+		      pkt[10] >> 4, pkt[10] & 0xf, pkt[11] >> 4, pkt[11] & 0xf,
+		      pkt[12] >> 4, pkt[12] & 0xf, pkt[13] >> 4, pkt[13] & 0xf,
+		      pkt[14] >> 4, pkt[14] & 0xf, pkt[15] >> 4, pkt[15] & 0xf,
+		    };
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (!first)
+			NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(1,  lt,  0,  pg[0], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(2,  lt,  1,  pg[1], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt,  2,  pg[2], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(4,  lt,  3,  pg[3], bsize);
+		if (!first) {
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
+			NFT_PIPAPO_AVX2_AND(1, 1, 0);
+		}
+
+		NFT_PIPAPO_AVX2_AND(5,   2,  3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(6,  lt,  4,  pg[4], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(7,  lt,  5,  pg[5], bsize);
+		NFT_PIPAPO_AVX2_AND(8,   1,  4);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(9,  lt,  6,  pg[6], bsize);
+		NFT_PIPAPO_AVX2_AND(10,  5,  6);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt,  7,  pg[7], bsize);
+		NFT_PIPAPO_AVX2_AND(12,  7,  8);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt,  8,  pg[8], bsize);
+		NFT_PIPAPO_AVX2_AND(14,  9, 10);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(0,  lt,  9,  pg[9], bsize);
+		NFT_PIPAPO_AVX2_AND(1,  11, 12);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(2,  lt, 10, pg[10], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 11, pg[11], bsize);
+		NFT_PIPAPO_AVX2_AND(4,  13, 14);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(5,  lt, 12, pg[12], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(6,  lt, 13, pg[13], bsize);
+		NFT_PIPAPO_AVX2_AND(7,   0,  1);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(8,  lt, 14, pg[14], bsize);
+		NFT_PIPAPO_AVX2_AND(9,   2,  3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 15, pg[15], bsize);
+		NFT_PIPAPO_AVX2_AND(11,  4,  5);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 16, pg[16], bsize);
+		NFT_PIPAPO_AVX2_AND(13,  6,  7);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 17, pg[17], bsize);
+
+		NFT_PIPAPO_AVX2_AND(0,   8,  9);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(1,  lt, 18, pg[18], bsize);
+		NFT_PIPAPO_AVX2_AND(2,  10, 11);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 19, pg[19], bsize);
+		NFT_PIPAPO_AVX2_AND(4,  12, 13);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(5,  lt, 20, pg[20], bsize);
+		NFT_PIPAPO_AVX2_AND(6,  14,  0);
+		NFT_PIPAPO_AVX2_AND(7,   1,  2);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(8,  lt, 21, pg[21], bsize);
+		NFT_PIPAPO_AVX2_AND(9,   3,  4);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 22, pg[22], bsize);
+		NFT_PIPAPO_AVX2_AND(11,  5,  6);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 23, pg[23], bsize);
+		NFT_PIPAPO_AVX2_AND(13,  7,  8);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 24, pg[24], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(0,  lt, 25, pg[25], bsize);
+		NFT_PIPAPO_AVX2_AND(1,   9, 10);
+		NFT_PIPAPO_AVX2_AND(2,  11, 12);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(3,  lt, 26, pg[26], bsize);
+		NFT_PIPAPO_AVX2_AND(4,  13, 14);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(5,  lt, 27, pg[27], bsize);
+		NFT_PIPAPO_AVX2_AND(6,   0,  1);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(7,  lt, 28, pg[28], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(8,  lt, 29, pg[29], bsize);
+		NFT_PIPAPO_AVX2_AND(9,   2,  3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 30, pg[30], bsize);
+		NFT_PIPAPO_AVX2_AND(11,  4,  5);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 31, pg[31], bsize);
+
+		NFT_PIPAPO_AVX2_AND(0,   6,  7);
+		NFT_PIPAPO_AVX2_AND(1,   8,  9);
+		NFT_PIPAPO_AVX2_AND(2,  10, 11);
+		NFT_PIPAPO_AVX2_AND(3,  12,  0);
+
+		/* Stalls */
+		NFT_PIPAPO_AVX2_AND(4,   1,  2);
+		NFT_PIPAPO_AVX2_AND(5,   3,  4);
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 5);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 8-bit fields (i.e. protocol numbers).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 0, pkt[0], bsize);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
+			NFT_PIPAPO_AVX2_AND(2, 0, 1);
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
+		}
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 2);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 16-bit fields (i.e. ports).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
+			NFT_PIPAPO_AVX2_AND(4, 0, 1);
+		} else {
+			NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(3, 0, 1);
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
+			NFT_PIPAPO_AVX2_AND(4, 3, 2);
+		}
+
+		/* Stall */
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 32-bit fields (i.e. IPv4 addresses).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0,  lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(1,  lt, 1, pkt[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2,  lt, 2, pkt[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(3,  lt, 3, pkt[3], bsize);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(4, 0, 1);
+			NFT_PIPAPO_AVX2_AND(5, 2, 3);
+			NFT_PIPAPO_AVX2_AND(0, 4, 5);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0,  lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2,  lt, 1, pkt[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(3,  lt, 2, pkt[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(4,  lt, 3, pkt[3], bsize);
+
+			NFT_PIPAPO_AVX2_AND(5, 0, 1);
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
+			NFT_PIPAPO_AVX2_AND(6, 2, 3);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(7, 4, 5);
+			NFT_PIPAPO_AVX2_AND(0, 6, 7);
+		}
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 0);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
+				       struct nft_pipapo_field *f, int offset,
+				       const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (first) {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0,  lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(1,  lt, 1, pkt[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2,  lt, 2, pkt[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(3,  lt, 3, pkt[3], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(4,  lt, 4, pkt[4], bsize);
+
+			NFT_PIPAPO_AVX2_AND(5, 0, 1);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(6,  lt, 5, pkt[5], bsize);
+			NFT_PIPAPO_AVX2_AND(7, 2, 3);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(0, 4, 5);
+			NFT_PIPAPO_AVX2_AND(1, 6, 7);
+			NFT_PIPAPO_AVX2_AND(4, 0, 1);
+		} else {
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(0,  lt, 0, pkt[0], bsize);
+			NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(2,  lt, 1, pkt[1], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(3,  lt, 2, pkt[2], bsize);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(4,  lt, 3, pkt[3], bsize);
+
+			NFT_PIPAPO_AVX2_AND(5, 0, 1);
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
+
+			NFT_PIPAPO_AVX2_AND(6, 2, 3);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(7,  lt, 4, pkt[4], bsize);
+			NFT_PIPAPO_AVX2_AND(0, 4, 5);
+			NFT_PIPAPO_AVX2_BUCKET_LOAD8(1,  lt, 5, pkt[5], bsize);
+			NFT_PIPAPO_AVX2_AND(2, 6, 7);
+
+			/* Stall */
+			NFT_PIPAPO_AVX2_AND(3, 0, 1);
+			NFT_PIPAPO_AVX2_AND(4, 2, 3);
+		}
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * See nft_pipapo_avx2_lookup_4b_2().
+ *
+ * This is used for 128-bit fields (i.e. IPv6 addresses).
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
+					struct nft_pipapo_field *f, int offset,
+					const u8 *pkt, bool first, bool last)
+{
+	int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
+	unsigned long *lt = f->lt, bsize = f->bsize;
+
+	lt += offset * NFT_PIPAPO_LONGS_PER_M256;
+	for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
+		int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
+
+		if (!first)
+			NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt,  0,  pkt[0], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt,  1,  pkt[1], bsize);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt,  2,  pkt[2], bsize);
+		if (!first) {
+			NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
+			NFT_PIPAPO_AVX2_AND(1, 1, 0);
+		}
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt,  3,  pkt[3], bsize);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt,  4,  pkt[4], bsize);
+		NFT_PIPAPO_AVX2_AND(6, 1, 2);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt,  5,  pkt[5], bsize);
+		NFT_PIPAPO_AVX2_AND(0, 3, 4);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt,  6,  pkt[6], bsize);
+
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt,  7,  pkt[7], bsize);
+		NFT_PIPAPO_AVX2_AND(3, 5, 6);
+		NFT_PIPAPO_AVX2_AND(4, 0, 1);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt,  8,  pkt[8], bsize);
+
+		NFT_PIPAPO_AVX2_AND(6, 2, 3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt,  9,  pkt[9], bsize);
+		NFT_PIPAPO_AVX2_AND(0, 4, 5);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
+		NFT_PIPAPO_AVX2_AND(2, 6, 7);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
+		NFT_PIPAPO_AVX2_AND(4, 0, 1);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 12, pkt[12], bsize);
+		NFT_PIPAPO_AVX2_AND(6, 2, 3);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 13, pkt[13], bsize);
+		NFT_PIPAPO_AVX2_AND(0, 4, 5);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 14, pkt[14], bsize);
+		NFT_PIPAPO_AVX2_AND(2, 6, 7);
+		NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 15, pkt[15], bsize);
+		NFT_PIPAPO_AVX2_AND(4, 0, 1);
+
+		/* Stall */
+		NFT_PIPAPO_AVX2_AND(5, 2, 3);
+		NFT_PIPAPO_AVX2_AND(6, 4, 5);
+
+		NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch);
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 6);
+
+		b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
+		if (last)
+			return b;
+
+		if (unlikely(ret == -1))
+			ret = b / XSAVE_YMM_SIZE;
+
+		continue;
+
+nomatch:
+		NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
+nothing:
+		;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
+ * @map:	Previous match result, used as initial bitmap
+ * @fill:	Destination bitmap to be filled with current match result
+ * @f:		Field, containing lookup and mapping tables
+ * @offset:	Ignore buckets before the given index, no bits are filled there
+ * @pkt:	Packet data, pointer to input nftables register
+ * @first:	If this is the first field, don't source previous result
+ * @last:	Last field: stop at the first match and return bit index
+ *
+ * This function should never be called, but is provided for the case the field
+ * size doesn't match any of the known data types. Matching rate is
+ * substantially lower than AVX2 routines.
+ *
+ * Return: -1 on no match, rule index of match if @last, otherwise first long
+ * word index to be checked next (i.e. first filled word).
+ */
+static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
+					struct nft_pipapo_field *f, int offset,
+					const u8 *pkt, bool first, bool last)
+{
+	unsigned long bsize = f->bsize;
+	int i, ret = -1, b;
+
+	if (first)
+		memset(map, 0xff, bsize * sizeof(*map));
+
+	for (i = offset; i < bsize; i++) {
+		if (f->bb == 8)
+			pipapo_and_field_buckets_8bit(f, map, pkt);
+		else
+			pipapo_and_field_buckets_4bit(f, map, pkt);
+		NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
+
+		b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last);
+
+		if (last)
+			return b;
+
+		if (ret == -1)
+			ret = b / XSAVE_YMM_SIZE;
+	}
+
+	return ret;
+}
+
+/**
+ * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity
+ * @desc:	Set description, element count and field description used
+ * @features:	Flags: NFT_SET_INTERVAL needs to be there
+ * @est:	Storage for estimation data
+ *
+ * Return: true if set is compatible and AVX2 available, false otherwise.
+ */
+bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	if (!(features & NFT_SET_INTERVAL) ||
+	    desc->field_count < NFT_PIPAPO_MIN_FIELDS)
+		return false;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX))
+		return false;
+
+	est->size = pipapo_estimate_size(desc);
+	if (!est->size)
+		return false;
+
+	est->lookup = NFT_SET_CLASS_O_LOG_N;
+
+	est->space = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+/**
+ * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
+ * @net:	Network namespace
+ * @set:	nftables API set representation
+ * @key:	nftables API element representation containing key data
+ * @ext:	nftables API extension pointer, filled with matching reference
+ *
+ * For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
+ *
+ * This implementation exploits the repetitive characteristic of the algorithm
+ * to provide a fast, vectorised version using the AVX2 SIMD instruction set.
+ *
+ * Return: true on match, false otherwise.
+ */
+bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+			    const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	unsigned long *res, *fill, *scratch;
+	u8 genmask = nft_genmask_cur(net);
+	const u8 *rp = (const u8 *)key;
+	struct nft_pipapo_match *m;
+	struct nft_pipapo_field *f;
+	bool map_index;
+	int i, ret = 0;
+
+	if (unlikely(!irq_fpu_usable()))
+		return nft_pipapo_lookup(net, set, key, ext);
+
+	m = rcu_dereference(priv->match);
+
+	/* This also protects access to all data related to scratch maps.
+	 *
+	 * Note that we don't need a valid MXCSR state for any of the
+	 * operations we use here, so pass 0 as mask and spare a LDMXCSR
+	 * instruction.
+	 */
+	kernel_fpu_begin_mask(0);
+
+	scratch = *raw_cpu_ptr(m->scratch_aligned);
+	if (unlikely(!scratch)) {
+		kernel_fpu_end();
+		return false;
+	}
+	map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
+
+	res  = scratch + (map_index ? m->bsize_max : 0);
+	fill = scratch + (map_index ? 0 : m->bsize_max);
+
+	/* Starting map doesn't need to be set for this implementation */
+
+	nft_pipapo_avx2_prepare();
+
+next_match:
+	nft_pipapo_for_each_field(f, i, m) {
+		bool last = i == m->field_count - 1, first = !i;
+
+#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n)				\
+		(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f,	\
+							 ret, rp,	\
+							 first, last))
+
+		if (likely(f->bb == 8)) {
+			if (f->groups == 1) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1);
+			} else if (f->groups == 2) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2);
+			} else if (f->groups == 4) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4);
+			} else if (f->groups == 6) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6);
+			} else if (f->groups == 16) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
+			} else {
+				ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+								  ret, rp,
+								  first, last);
+			}
+		} else {
+			if (f->groups == 2) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2);
+			} else if (f->groups == 4) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4);
+			} else if (f->groups == 8) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8);
+			} else if (f->groups == 12) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12);
+			} else if (f->groups == 32) {
+				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
+			} else {
+				ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+								  ret, rp,
+								  first, last);
+			}
+		}
+		NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
+
+#undef NFT_SET_PIPAPO_AVX2_LOOKUP
+
+		if (ret < 0)
+			goto out;
+
+		if (last) {
+			*ext = &f->mt[ret].e->ext;
+			if (unlikely(nft_set_elem_expired(*ext) ||
+				     !nft_set_elem_active(*ext, genmask))) {
+				ret = 0;
+				goto next_match;
+			}
+
+			goto out;
+		}
+
+		swap(res, fill);
+		rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
+	}
+
+out:
+	if (i % 2)
+		raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+	kernel_fpu_end();
+
+	return ret >= 0;
+}
diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h
new file mode 100644
index 000000000..dbb6aaca8
--- /dev/null
+++ b/net/netfilter/nft_set_pipapo_avx2.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _NFT_SET_PIPAPO_AVX2_H
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+#include <asm/fpu/xstate.h>
+#define NFT_PIPAPO_ALIGN	(XSAVE_YMM_SIZE / BITS_PER_BYTE)
+
+bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est);
+#endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */
+
+#endif /* _NFT_SET_PIPAPO_AVX2_H */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
new file mode 100644
index 000000000..e34662f4a
--- /dev/null
+++ b/net/netfilter/nft_set_rbtree.c
@@ -0,0 +1,773 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_rbtree {
+	struct rb_root		root;
+	rwlock_t		lock;
+	seqcount_rwlock_t	count;
+	struct delayed_work	gc_work;
+};
+
+struct nft_rbtree_elem {
+	struct rb_node		node;
+	struct nft_set_ext	ext;
+};
+
+static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
+{
+	return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+	       (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
+}
+
+static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+{
+	return !nft_rbtree_interval_end(rbe);
+}
+
+static int nft_rbtree_cmp(const struct nft_set *set,
+			  const struct nft_rbtree_elem *e1,
+			  const struct nft_rbtree_elem *e2)
+{
+	return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+		      set->klen);
+}
+
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+	return nft_set_elem_expired(&rbe->ext) ||
+	       nft_set_elem_is_dead(&rbe->ext);
+}
+
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+				const u32 *key, const struct nft_set_ext **ext,
+				unsigned int seq)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	const struct nft_rbtree_elem *rbe, *interval = NULL;
+	u8 genmask = nft_genmask_cur(net);
+	const struct rb_node *parent;
+	int d;
+
+	parent = rcu_dereference_raw(priv->root.rb_node);
+	while (parent != NULL) {
+		if (read_seqcount_retry(&priv->count, seq))
+			return false;
+
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+		if (d < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+			if (interval &&
+			    !nft_rbtree_cmp(set, rbe, interval) &&
+			    nft_rbtree_interval_end(rbe) &&
+			    nft_rbtree_interval_start(interval))
+				continue;
+			interval = rbe;
+		} else if (d > 0)
+			parent = rcu_dereference_raw(parent->rb_right);
+		else {
+			if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = rcu_dereference_raw(parent->rb_left);
+				continue;
+			}
+
+			if (nft_rbtree_elem_expired(rbe))
+				return false;
+
+			if (nft_rbtree_interval_end(rbe)) {
+				if (nft_set_is_anonymous(set))
+					return false;
+				parent = rcu_dereference_raw(parent->rb_left);
+				interval = NULL;
+				continue;
+			}
+
+			*ext = &rbe->ext;
+			return true;
+		}
+	}
+
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+	    nft_set_elem_active(&interval->ext, genmask) &&
+	    !nft_rbtree_elem_expired(interval) &&
+	    nft_rbtree_interval_start(interval)) {
+		*ext = &interval->ext;
+		return true;
+	}
+
+	return false;
+}
+
+INDIRECT_CALLABLE_SCOPE
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	unsigned int seq = read_seqcount_begin(&priv->count);
+	bool ret;
+
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+	if (ret || !read_seqcount_retry(&priv->count, seq))
+		return ret;
+
+	read_lock_bh(&priv->lock);
+	seq = read_seqcount_begin(&priv->count);
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+	read_unlock_bh(&priv->lock);
+
+	return ret;
+}
+
+static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
+			     const u32 *key, struct nft_rbtree_elem **elem,
+			     unsigned int seq, unsigned int flags, u8 genmask)
+{
+	struct nft_rbtree_elem *rbe, *interval = NULL;
+	struct nft_rbtree *priv = nft_set_priv(set);
+	const struct rb_node *parent;
+	const void *this;
+	int d;
+
+	parent = rcu_dereference_raw(priv->root.rb_node);
+	while (parent != NULL) {
+		if (read_seqcount_retry(&priv->count, seq))
+			return false;
+
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		this = nft_set_ext_key(&rbe->ext);
+		d = memcmp(this, key, set->klen);
+		if (d < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+			if (!(flags & NFT_SET_ELEM_INTERVAL_END))
+				interval = rbe;
+		} else if (d > 0) {
+			parent = rcu_dereference_raw(parent->rb_right);
+			if (flags & NFT_SET_ELEM_INTERVAL_END)
+				interval = rbe;
+		} else {
+			if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = rcu_dereference_raw(parent->rb_left);
+				continue;
+			}
+
+			if (nft_set_elem_expired(&rbe->ext))
+				return false;
+
+			if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+			    (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+			    (flags & NFT_SET_ELEM_INTERVAL_END)) {
+				*elem = rbe;
+				return true;
+			}
+
+			if (nft_rbtree_interval_end(rbe))
+				interval = NULL;
+
+			parent = rcu_dereference_raw(parent->rb_left);
+		}
+	}
+
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+	    nft_set_elem_active(&interval->ext, genmask) &&
+	    !nft_set_elem_expired(&interval->ext) &&
+	    ((!nft_rbtree_interval_end(interval) &&
+	      !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
+	     (nft_rbtree_interval_end(interval) &&
+	      (flags & NFT_SET_ELEM_INTERVAL_END)))) {
+		*elem = interval;
+		return true;
+	}
+
+	return false;
+}
+
+static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	unsigned int seq = read_seqcount_begin(&priv->count);
+	struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
+	const u32 *key = (const u32 *)&elem->key.val;
+	u8 genmask = nft_genmask_cur(net);
+	bool ret;
+
+	ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+	if (ret || !read_seqcount_retry(&priv->count, seq))
+		return rbe;
+
+	read_lock_bh(&priv->lock);
+	seq = read_seqcount_begin(&priv->count);
+	ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+	if (!ret)
+		rbe = ERR_PTR(-ENOENT);
+	read_unlock_bh(&priv->lock);
+
+	return rbe;
+}
+
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+				 struct nft_rbtree *priv,
+				 struct nft_rbtree_elem *rbe)
+{
+	struct nft_set_elem elem = {
+		.priv	= rbe,
+	};
+
+	nft_setelem_data_deactivate(net, set, &elem);
+	rb_erase(&rbe->node, &priv->root);
+}
+
+static const struct nft_rbtree_elem *
+nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
+		   struct nft_rbtree_elem *rbe, u8 genmask)
+{
+	struct nft_set *set = (struct nft_set *)__set;
+	struct rb_node *prev = rb_prev(&rbe->node);
+	struct net *net = read_pnet(&set->net);
+	struct nft_rbtree_elem *rbe_prev;
+	struct nft_trans_gc *gc;
+
+	gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+	if (!gc)
+		return ERR_PTR(-ENOMEM);
+
+	/* search for end interval coming before this element.
+	 * end intervals don't carry a timeout extension, they
+	 * are coupled with the interval start element.
+	 */
+	while (prev) {
+		rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+		if (nft_rbtree_interval_end(rbe_prev) &&
+		    nft_set_elem_active(&rbe_prev->ext, genmask))
+			break;
+
+		prev = rb_prev(prev);
+	}
+
+	rbe_prev = NULL;
+	if (prev) {
+		rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+		nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+		/* There is always room in this trans gc for this element,
+		 * memory allocation never actually happens, hence, the warning
+		 * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+		 * this is synchronous gc which never fails.
+		 */
+		gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+		if (WARN_ON_ONCE(!gc))
+			return ERR_PTR(-ENOMEM);
+
+		nft_trans_gc_elem_add(gc, rbe_prev);
+	}
+
+	nft_rbtree_gc_remove(net, set, priv, rbe);
+	gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+	if (WARN_ON_ONCE(!gc))
+		return ERR_PTR(-ENOMEM);
+
+	nft_trans_gc_elem_add(gc, rbe);
+
+	nft_trans_gc_queue_sync_done(gc);
+
+	return rbe_prev;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+				    struct nft_rbtree_elem *rbe,
+				    struct rb_node *first)
+{
+	struct nft_rbtree_elem *first_elem;
+
+	first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+	/* this element is closest to where the new element is to be inserted:
+	 * update the first element for the node list path.
+	 */
+	if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+		return true;
+
+	return false;
+}
+
+static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+			       struct nft_rbtree_elem *new,
+			       struct nft_set_ext **ext)
+{
+	struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+	struct rb_node *node, *next, *parent, **p, *first = NULL;
+	struct nft_rbtree *priv = nft_set_priv(set);
+	u8 cur_genmask = nft_genmask_cur(net);
+	u8 genmask = nft_genmask_next(net);
+	int d;
+
+	/* Descend the tree to search for an existing element greater than the
+	 * key value to insert that is greater than the new element. This is the
+	 * first element to walk the ordered elements to find possible overlap.
+	 */
+	parent = NULL;
+	p = &priv->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+		d = nft_rbtree_cmp(set, rbe, new);
+
+		if (d < 0) {
+			p = &parent->rb_left;
+		} else if (d > 0) {
+			if (!first ||
+			    nft_rbtree_update_first(set, rbe, first))
+				first = &rbe->node;
+
+			p = &parent->rb_right;
+		} else {
+			if (nft_rbtree_interval_end(rbe))
+				p = &parent->rb_left;
+			else
+				p = &parent->rb_right;
+		}
+	}
+
+	if (!first)
+		first = rb_first(&priv->root);
+
+	/* Detect overlap by going through the list of valid tree nodes.
+	 * Values stored in the tree are in reversed order, starting from
+	 * highest to lowest value.
+	 */
+	for (node = first; node != NULL; node = next) {
+		next = rb_next(node);
+
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+		if (!nft_set_elem_active(&rbe->ext, genmask))
+			continue;
+
+		/* perform garbage collection to avoid bogus overlap reports
+		 * but skip new elements in this transaction.
+		 */
+		if (nft_set_elem_expired(&rbe->ext) &&
+		    nft_set_elem_active(&rbe->ext, cur_genmask)) {
+			const struct nft_rbtree_elem *removed_end;
+
+			removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+			if (IS_ERR(removed_end))
+				return PTR_ERR(removed_end);
+
+			if (removed_end == rbe_le || removed_end == rbe_ge)
+				return -EAGAIN;
+
+			continue;
+		}
+
+		d = nft_rbtree_cmp(set, rbe, new);
+		if (d == 0) {
+			/* Matching end element: no need to look for an
+			 * overlapping greater or equal element.
+			 */
+			if (nft_rbtree_interval_end(rbe)) {
+				rbe_le = rbe;
+				break;
+			}
+
+			/* first element that is greater or equal to key value. */
+			if (!rbe_ge) {
+				rbe_ge = rbe;
+				continue;
+			}
+
+			/* this is a closer more or equal element, update it. */
+			if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+				rbe_ge = rbe;
+				continue;
+			}
+
+			/* element is equal to key value, make sure flags are
+			 * the same, an existing more or equal start element
+			 * must not be replaced by more or equal end element.
+			 */
+			if ((nft_rbtree_interval_start(new) &&
+			     nft_rbtree_interval_start(rbe_ge)) ||
+			    (nft_rbtree_interval_end(new) &&
+			     nft_rbtree_interval_end(rbe_ge))) {
+				rbe_ge = rbe;
+				continue;
+			}
+		} else if (d > 0) {
+			/* annotate element greater than the new element. */
+			rbe_ge = rbe;
+			continue;
+		} else if (d < 0) {
+			/* annotate element less than the new element. */
+			rbe_le = rbe;
+			break;
+		}
+	}
+
+	/* - new start element matching existing start element: full overlap
+	 *   reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+	 */
+	if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+	    nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+		*ext = &rbe_ge->ext;
+		return -EEXIST;
+	}
+
+	/* - new end element matching existing end element: full overlap
+	 *   reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+	 */
+	if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+	    nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+		*ext = &rbe_le->ext;
+		return -EEXIST;
+	}
+
+	/* - new start element with existing closest, less or equal key value
+	 *   being a start element: partial overlap, reported as -ENOTEMPTY.
+	 *   Anonymous sets allow for two consecutive start element since they
+	 *   are constant, skip them to avoid bogus overlap reports.
+	 */
+	if (!nft_set_is_anonymous(set) && rbe_le &&
+	    nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+		return -ENOTEMPTY;
+
+	/* - new end element with existing closest, less or equal key value
+	 *   being a end element: partial overlap, reported as -ENOTEMPTY.
+	 */
+	if (rbe_le &&
+	    nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
+		return -ENOTEMPTY;
+
+	/* - new end element with existing closest, greater or equal key value
+	 *   being an end element: partial overlap, reported as -ENOTEMPTY
+	 */
+	if (rbe_ge &&
+	    nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+		return -ENOTEMPTY;
+
+	/* Accepted element: pick insertion point depending on key value */
+	parent = NULL;
+	p = &priv->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+		d = nft_rbtree_cmp(set, rbe, new);
+
+		if (d < 0)
+			p = &parent->rb_left;
+		else if (d > 0)
+			p = &parent->rb_right;
+		else if (nft_rbtree_interval_end(rbe))
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node_rcu(&new->node, parent, p);
+	rb_insert_color(&new->node, &priv->root);
+	return 0;
+}
+
+static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+			     const struct nft_set_elem *elem,
+			     struct nft_set_ext **ext)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe = elem->priv;
+	int err;
+
+	do {
+		if (fatal_signal_pending(current))
+			return -EINTR;
+
+		cond_resched();
+
+		write_lock_bh(&priv->lock);
+		write_seqcount_begin(&priv->count);
+		err = __nft_rbtree_insert(net, set, rbe, ext);
+		write_seqcount_end(&priv->count);
+		write_unlock_bh(&priv->lock);
+	} while (err == -EAGAIN);
+
+	return err;
+}
+
+static void nft_rbtree_remove(const struct net *net,
+			      const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe = elem->priv;
+
+	write_lock_bh(&priv->lock);
+	write_seqcount_begin(&priv->count);
+	rb_erase(&rbe->node, &priv->root);
+	write_seqcount_end(&priv->count);
+	write_unlock_bh(&priv->lock);
+}
+
+static void nft_rbtree_activate(const struct net *net,
+				const struct nft_set *set,
+				const struct nft_set_elem *elem)
+{
+	struct nft_rbtree_elem *rbe = elem->priv;
+
+	nft_set_elem_change_active(net, set, &rbe->ext);
+}
+
+static bool nft_rbtree_flush(const struct net *net,
+			     const struct nft_set *set, void *priv)
+{
+	struct nft_rbtree_elem *rbe = priv;
+
+	nft_set_elem_change_active(net, set, &rbe->ext);
+
+	return true;
+}
+
+static void *nft_rbtree_deactivate(const struct net *net,
+				   const struct nft_set *set,
+				   const struct nft_set_elem *elem)
+{
+	const struct nft_rbtree *priv = nft_set_priv(set);
+	const struct rb_node *parent = priv->root.rb_node;
+	struct nft_rbtree_elem *rbe, *this = elem->priv;
+	u8 genmask = nft_genmask_next(net);
+	int d;
+
+	while (parent != NULL) {
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+					   set->klen);
+		if (d < 0)
+			parent = parent->rb_left;
+		else if (d > 0)
+			parent = parent->rb_right;
+		else {
+			if (nft_rbtree_interval_end(rbe) &&
+			    nft_rbtree_interval_start(this)) {
+				parent = parent->rb_left;
+				continue;
+			} else if (nft_rbtree_interval_start(rbe) &&
+				   nft_rbtree_interval_end(this)) {
+				parent = parent->rb_right;
+				continue;
+			} else if (nft_set_elem_expired(&rbe->ext)) {
+				break;
+			} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = parent->rb_left;
+				continue;
+			}
+			nft_rbtree_flush(net, set, rbe);
+			return rbe;
+		}
+	}
+	return NULL;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+			    struct nft_set *set,
+			    struct nft_set_iter *iter)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe;
+	struct nft_set_elem elem;
+	struct rb_node *node;
+
+	read_lock_bh(&priv->lock);
+	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+		if (iter->count < iter->skip)
+			goto cont;
+		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = rbe;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0) {
+			read_unlock_bh(&priv->lock);
+			return;
+		}
+cont:
+		iter->count++;
+	}
+	read_unlock_bh(&priv->lock);
+}
+
+static void nft_rbtree_gc(struct work_struct *work)
+{
+	struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+	struct nftables_pernet *nft_net;
+	struct nft_rbtree *priv;
+	struct nft_trans_gc *gc;
+	struct rb_node *node;
+	struct nft_set *set;
+	unsigned int gc_seq;
+	struct net *net;
+
+	priv = container_of(work, struct nft_rbtree, gc_work.work);
+	set  = nft_set_container_of(priv);
+	net  = read_pnet(&set->net);
+	nft_net = nft_pernet(net);
+	gc_seq  = READ_ONCE(nft_net->gc_seq);
+
+	if (nft_set_gc_is_pending(set))
+		goto done;
+
+	gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+	if (!gc)
+		goto done;
+
+	read_lock_bh(&priv->lock);
+	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+		/* Ruleset has been updated, try later. */
+		if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+			nft_trans_gc_destroy(gc);
+			gc = NULL;
+			goto try_later;
+		}
+
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+		if (nft_set_elem_is_dead(&rbe->ext))
+			goto dead_elem;
+
+		/* elements are reversed in the rbtree for historical reasons,
+		 * from highest to lowest value, that is why end element is
+		 * always visited before the start element.
+		 */
+		if (nft_rbtree_interval_end(rbe)) {
+			rbe_end = rbe;
+			continue;
+		}
+		if (!nft_set_elem_expired(&rbe->ext))
+			continue;
+
+		nft_set_elem_dead(&rbe->ext);
+
+		if (!rbe_end)
+			continue;
+
+		nft_set_elem_dead(&rbe_end->ext);
+
+		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+		if (!gc)
+			goto try_later;
+
+		nft_trans_gc_elem_add(gc, rbe_end);
+		rbe_end = NULL;
+dead_elem:
+		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+		if (!gc)
+			goto try_later;
+
+		nft_trans_gc_elem_add(gc, rbe);
+	}
+
+	gc = nft_trans_gc_catchall_async(gc, gc_seq);
+
+try_later:
+	read_unlock_bh(&priv->lock);
+
+	if (gc)
+		nft_trans_gc_queue_async_done(gc);
+done:
+	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+			   nft_set_gc_interval(set));
+}
+
+static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
+			       const struct nft_set_desc *desc)
+{
+	return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+			   const struct nft_set_desc *desc,
+			   const struct nlattr * const nla[])
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+
+	rwlock_init(&priv->lock);
+	seqcount_rwlock_init(&priv->count, &priv->lock);
+	priv->root = RB_ROOT;
+
+	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
+	if (set->flags & NFT_SET_TIMEOUT)
+		queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+				   nft_set_gc_interval(set));
+
+	return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+			       const struct nft_set *set)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	struct nft_rbtree_elem *rbe;
+	struct rb_node *node;
+
+	cancel_delayed_work_sync(&priv->gc_work);
+	rcu_barrier();
+	while ((node = priv->root.rb_node) != NULL) {
+		rb_erase(node, &priv->root);
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+		nf_tables_set_elem_destroy(ctx, set, rbe);
+	}
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+				struct nft_set_estimate *est)
+{
+	if (desc->field_count > 1)
+		return false;
+
+	if (desc->size)
+		est->size = sizeof(struct nft_rbtree) +
+			    desc->size * sizeof(struct nft_rbtree_elem);
+	else
+		est->size = ~0;
+
+	est->lookup = NFT_SET_CLASS_O_LOG_N;
+	est->space  = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+const struct nft_set_type nft_set_rbtree_type = {
+	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+	.ops		= {
+		.privsize	= nft_rbtree_privsize,
+		.elemsize	= offsetof(struct nft_rbtree_elem, ext),
+		.estimate	= nft_rbtree_estimate,
+		.init		= nft_rbtree_init,
+		.destroy	= nft_rbtree_destroy,
+		.insert		= nft_rbtree_insert,
+		.remove		= nft_rbtree_remove,
+		.deactivate	= nft_rbtree_deactivate,
+		.flush		= nft_rbtree_flush,
+		.activate	= nft_rbtree_activate,
+		.lookup		= nft_rbtree_lookup,
+		.walk		= nft_rbtree_walk,
+		.get		= nft_rbtree_get,
+	},
+};
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
new file mode 100644
index 000000000..f28324fd8
--- /dev/null
+++ b/net/netfilter/nft_socket.c
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_socket.h>
+#include <net/inet_sock.h>
+#include <net/tcp.h>
+
+struct nft_socket {
+	enum nft_socket_keys		key:8;
+	u8				level;
+	u8				len;
+	union {
+		u8			dreg;
+	};
+};
+
+static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
+				struct nft_regs *regs, struct sock *sk,
+				u32 *dest)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr));
+		break;
+#endif
+	default:
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+}
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static noinline bool
+nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
+{
+	struct cgroup *cgrp;
+	u64 cgid;
+
+	if (!sk_fullsock(sk))
+		return false;
+
+	cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level);
+	if (!cgrp)
+		return false;
+
+	cgid = cgroup_id(cgrp);
+	memcpy(dest, &cgid, sizeof(u64));
+	return true;
+}
+#endif
+
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+	const struct net_device *indev = nft_in(pkt);
+	const struct sk_buff *skb = pkt->skb;
+	struct sock *sk = NULL;
+
+	if (!indev)
+		return NULL;
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+		break;
+#endif
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return sk;
+}
+
+static void nft_socket_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_socket *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	struct sock *sk = skb->sk;
+	u32 *dest = &regs->data[priv->dreg];
+
+	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+		sk = NULL;
+
+	if (!sk)
+		sk = nft_socket_do_lookup(pkt);
+
+	if (!sk) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	switch(priv->key) {
+	case NFT_SOCKET_TRANSPARENT:
+		nft_reg_store8(dest, inet_sk_transparent(sk));
+		break;
+	case NFT_SOCKET_MARK:
+		if (sk_fullsock(sk)) {
+			*dest = READ_ONCE(sk->sk_mark);
+		} else {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		break;
+	case NFT_SOCKET_WILDCARD:
+		if (!sk_fullsock(sk)) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		nft_socket_wildcard(pkt, regs, sk, dest);
+		break;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	case NFT_SOCKET_CGROUPV2:
+		if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		regs->verdict.code = NFT_BREAK;
+	}
+
+	if (sk != skb->sk)
+		sock_gen_put(sk);
+}
+
+static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
+	[NFTA_SOCKET_KEY]		= { .type = NLA_U32 },
+	[NFTA_SOCKET_DREG]		= { .type = NLA_U32 },
+	[NFTA_SOCKET_LEVEL]		= { .type = NLA_U32 },
+};
+
+static int nft_socket_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_socket *priv = nft_expr_priv(expr);
+	unsigned int len;
+
+	if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
+		return -EINVAL;
+
+	switch(ctx->family) {
+	case NFPROTO_IPV4:
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+#endif
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY]));
+	switch(priv->key) {
+	case NFT_SOCKET_TRANSPARENT:
+	case NFT_SOCKET_WILDCARD:
+		len = sizeof(u8);
+		break;
+	case NFT_SOCKET_MARK:
+		len = sizeof(u32);
+		break;
+#ifdef CONFIG_CGROUPS
+	case NFT_SOCKET_CGROUPV2: {
+		unsigned int level;
+
+		if (!tb[NFTA_SOCKET_LEVEL])
+			return -EINVAL;
+
+		level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL]));
+		if (level > 255)
+			return -EOPNOTSUPP;
+
+		priv->level = level;
+		len = sizeof(u64);
+		break;
+	}
+#endif
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->len = len;
+	return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, len);
+}
+
+static int nft_socket_dump(struct sk_buff *skb,
+			   const struct nft_expr *expr)
+{
+	const struct nft_socket *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
+		return -1;
+	if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
+		return -1;
+	if (priv->key == NFT_SOCKET_CGROUPV2 &&
+	    nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+		return -1;
+	return 0;
+}
+
+static bool nft_socket_reduce(struct nft_regs_track *track,
+			      const struct nft_expr *expr)
+{
+	const struct nft_socket *priv = nft_expr_priv(expr);
+	const struct nft_socket *socket;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	socket = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != socket->key ||
+	    priv->dreg != socket->dreg ||
+	    priv->level != socket->level) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+
+static int nft_socket_validate(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nft_data **data)
+{
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_PRE_ROUTING) |
+					(1 << NF_INET_LOCAL_IN) |
+					(1 << NF_INET_LOCAL_OUT));
+}
+
+static struct nft_expr_type nft_socket_type;
+static const struct nft_expr_ops nft_socket_ops = {
+	.type		= &nft_socket_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_socket)),
+	.eval		= nft_socket_eval,
+	.init		= nft_socket_init,
+	.dump		= nft_socket_dump,
+	.validate	= nft_socket_validate,
+	.reduce		= nft_socket_reduce,
+};
+
+static struct nft_expr_type nft_socket_type __read_mostly = {
+	.name		= "socket",
+	.ops		= &nft_socket_ops,
+	.policy		= nft_socket_policy,
+	.maxattr	= NFTA_SOCKET_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_socket_module_init(void)
+{
+	return nft_register_expr(&nft_socket_type);
+}
+
+static void __exit nft_socket_module_exit(void)
+{
+	nft_unregister_expr(&nft_socket_type);
+}
+
+module_init(nft_socket_module_init);
+module_exit(nft_socket_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Máté Eckl");
+MODULE_DESCRIPTION("nf_tables socket match module");
+MODULE_ALIAS_NFT_EXPR("socket");
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
new file mode 100644
index 000000000..a450f28a5
--- /dev/null
+++ b/net/netfilter/nft_synproxy.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/netlink.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_synproxy.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_synproxy.h>
+
+struct nft_synproxy {
+	struct nf_synproxy_info	info;
+};
+
+static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = {
+	[NFTA_SYNPROXY_MSS]		= { .type = NLA_U16 },
+	[NFTA_SYNPROXY_WSCALE]		= { .type = NLA_U8 },
+	[NFTA_SYNPROXY_FLAGS]		= { .type = NLA_U32 },
+};
+
+static void nft_synproxy_tcp_options(struct synproxy_options *opts,
+				     const struct tcphdr *tcp,
+				     struct synproxy_net *snet,
+				     struct nf_synproxy_info *info,
+				     const struct nft_synproxy *priv)
+{
+	this_cpu_inc(snet->stats->syn_received);
+	if (tcp->ece && tcp->cwr)
+		opts->options |= NF_SYNPROXY_OPT_ECN;
+
+	opts->options &= priv->info.options;
+	opts->mss_encode = opts->mss_option;
+	opts->mss_option = info->mss;
+	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_init_timestamp_cookie(info, opts);
+	else
+		opts->options &= ~(NF_SYNPROXY_OPT_WSCALE |
+				   NF_SYNPROXY_OPT_SACK_PERM |
+				   NF_SYNPROXY_OPT_ECN);
+}
+
+static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt,
+				 const struct tcphdr *tcp,
+				 struct tcphdr *_tcph,
+				 struct synproxy_options *opts)
+{
+	struct nf_synproxy_info info = priv->info;
+	struct net *net = nft_net(pkt);
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *skb = pkt->skb;
+
+	if (tcp->syn) {
+		/* Initial SYN from client */
+		nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+		synproxy_send_client_synack(net, skb, tcp, opts);
+		consume_skb(skb);
+		regs->verdict.code = NF_STOLEN;
+	} else if (tcp->ack) {
+		/* ACK from client */
+		if (synproxy_recv_client_ack(net, skb, tcp, opts,
+					     ntohl(tcp->seq))) {
+			consume_skb(skb);
+			regs->verdict.code = NF_STOLEN;
+		} else {
+			regs->verdict.code = NF_DROP;
+		}
+	}
+}
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt,
+				 const struct tcphdr *tcp,
+				 struct tcphdr *_tcph,
+				 struct synproxy_options *opts)
+{
+	struct nf_synproxy_info info = priv->info;
+	struct net *net = nft_net(pkt);
+	struct synproxy_net *snet = synproxy_pernet(net);
+	struct sk_buff *skb = pkt->skb;
+
+	if (tcp->syn) {
+		/* Initial SYN from client */
+		nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+		synproxy_send_client_synack_ipv6(net, skb, tcp, opts);
+		consume_skb(skb);
+		regs->verdict.code = NF_STOLEN;
+	} else if (tcp->ack) {
+		/* ACK from client */
+		if (synproxy_recv_client_ack_ipv6(net, skb, tcp, opts,
+						  ntohl(tcp->seq))) {
+			consume_skb(skb);
+			regs->verdict.code = NF_STOLEN;
+		} else {
+			regs->verdict.code = NF_DROP;
+		}
+	}
+}
+#endif /* CONFIG_NF_TABLES_IPV6*/
+
+static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct synproxy_options opts = {};
+	struct sk_buff *skb = pkt->skb;
+	int thoff = nft_thoff(pkt);
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+
+	if (pkt->tprot != IPPROTO_TCP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	if (nf_ip_checksum(skb, nft_hook(pkt), thoff, IPPROTO_TCP)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	tcp = skb_header_pointer(skb, thoff,
+				 sizeof(struct tcphdr),
+				 &_tcph);
+	if (!tcp) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	if (!synproxy_parse_options(skb, thoff, tcp, &opts)) {
+		regs->verdict.code = NF_DROP;
+		return;
+	}
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts);
+		return;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case htons(ETH_P_IPV6):
+		nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts);
+		return;
+#endif
+	}
+	regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_synproxy_do_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_synproxy *priv)
+{
+	struct synproxy_net *snet = synproxy_pernet(ctx->net);
+	u32 flags;
+	int err;
+
+	if (tb[NFTA_SYNPROXY_MSS])
+		priv->info.mss = ntohs(nla_get_be16(tb[NFTA_SYNPROXY_MSS]));
+	if (tb[NFTA_SYNPROXY_WSCALE])
+		priv->info.wscale = nla_get_u8(tb[NFTA_SYNPROXY_WSCALE]);
+	if (tb[NFTA_SYNPROXY_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_SYNPROXY_FLAGS]));
+		if (flags & ~NF_SYNPROXY_OPT_MASK)
+			return -EOPNOTSUPP;
+		priv->info.options = flags;
+	}
+
+	err = nf_ct_netns_get(ctx->net, ctx->family);
+	if (err)
+		return err;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		err = nf_synproxy_ipv4_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		err = nf_synproxy_ipv6_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		break;
+#endif
+	case NFPROTO_INET:
+		err = nf_synproxy_ipv4_init(snet, ctx->net);
+		if (err)
+			goto nf_ct_failure;
+		err = nf_synproxy_ipv6_init(snet, ctx->net);
+		if (err) {
+			nf_synproxy_ipv4_fini(snet, ctx->net);
+			goto nf_ct_failure;
+		}
+		break;
+	}
+
+	return 0;
+
+nf_ct_failure:
+	nf_ct_netns_put(ctx->net, ctx->family);
+	return err;
+}
+
+static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
+{
+	struct synproxy_net *snet = synproxy_pernet(ctx->net);
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		nf_synproxy_ipv4_fini(snet, ctx->net);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nf_synproxy_ipv6_fini(snet, ctx->net);
+		break;
+#endif
+	case NFPROTO_INET:
+		nf_synproxy_ipv4_fini(snet, ctx->net);
+		nf_synproxy_ipv6_fini(snet, ctx->net);
+		break;
+	}
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv)
+{
+	if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) ||
+	    nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) ||
+	    nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_synproxy_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static int nft_synproxy_validate(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr,
+				 const struct nft_data **data)
+{
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+						    (1 << NF_INET_FORWARD));
+}
+
+static int nft_synproxy_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_destroy(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
+static struct nft_expr_type nft_synproxy_type;
+static const struct nft_expr_ops nft_synproxy_ops = {
+	.eval		= nft_synproxy_eval,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_synproxy)),
+	.init		= nft_synproxy_init,
+	.destroy	= nft_synproxy_destroy,
+	.dump		= nft_synproxy_dump,
+	.type		= &nft_synproxy_type,
+	.validate	= nft_synproxy_validate,
+	.reduce		= NFT_REDUCE_READONLY,
+};
+
+static struct nft_expr_type nft_synproxy_type __read_mostly = {
+	.ops		= &nft_synproxy_ops,
+	.name		= "synproxy",
+	.owner		= THIS_MODULE,
+	.policy		= nft_synproxy_policy,
+	.maxattr	= NFTA_SYNPROXY_MAX,
+};
+
+static int nft_synproxy_obj_init(const struct nft_ctx *ctx,
+				 const struct nlattr * const tb[],
+				 struct nft_object *obj)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx,
+				     struct nft_object *obj)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_obj_dump(struct sk_buff *skb,
+				 struct nft_object *obj, bool reset)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
+static void nft_synproxy_obj_eval(struct nft_object *obj,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_obj_data(obj);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static void nft_synproxy_obj_update(struct nft_object *obj,
+				    struct nft_object *newobj)
+{
+	struct nft_synproxy *newpriv = nft_obj_data(newobj);
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	priv->info = newpriv->info;
+}
+
+static struct nft_object_type nft_synproxy_obj_type;
+static const struct nft_object_ops nft_synproxy_obj_ops = {
+	.type		= &nft_synproxy_obj_type,
+	.size		= sizeof(struct nft_synproxy),
+	.init		= nft_synproxy_obj_init,
+	.destroy	= nft_synproxy_obj_destroy,
+	.dump		= nft_synproxy_obj_dump,
+	.eval		= nft_synproxy_obj_eval,
+	.update		= nft_synproxy_obj_update,
+};
+
+static struct nft_object_type nft_synproxy_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_SYNPROXY,
+	.ops		= &nft_synproxy_obj_ops,
+	.maxattr	= NFTA_SYNPROXY_MAX,
+	.policy		= nft_synproxy_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_synproxy_module_init(void)
+{
+	int err;
+
+	err = nft_register_obj(&nft_synproxy_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_synproxy_type);
+	if (err < 0)
+		goto err;
+
+	return 0;
+
+err:
+	nft_unregister_obj(&nft_synproxy_obj_type);
+	return err;
+}
+
+static void __exit nft_synproxy_module_exit(void)
+{
+	nft_unregister_expr(&nft_synproxy_type);
+	nft_unregister_obj(&nft_synproxy_obj_type);
+}
+
+module_init(nft_synproxy_module_init);
+module_exit(nft_synproxy_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
+MODULE_ALIAS_NFT_EXPR("synproxy");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
new file mode 100644
index 000000000..adb50c395
--- /dev/null
+++ b/net/netfilter/nft_tproxy.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tproxy.h>
+#include <net/inet_sock.h>
+#include <net/tcp.h>
+#include <linux/if_ether.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+
+struct nft_tproxy {
+	u8	sreg_addr;
+	u8	sreg_port;
+	u8	family;
+};
+
+static void nft_tproxy_eval_v4(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	const struct iphdr *iph = ip_hdr(skb);
+	struct udphdr _hdr, *hp;
+	__be32 taddr = 0;
+	__be16 tport = 0;
+	struct sock *sk;
+
+	if (pkt->tprot != IPPROTO_TCP &&
+	    pkt->tprot != IPPROTO_UDP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (!hp) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	/* check if there's an ongoing connection on the packet addresses, this
+	 * happens if the redirect already happened and the current packet
+	 * belongs to an already established connection
+	 */
+	sk = nf_tproxy_get_sock_v4(nft_net(pkt), skb, iph->protocol,
+				   iph->saddr, iph->daddr,
+				   hp->source, hp->dest,
+				   skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	if (priv->sreg_addr)
+		taddr = nft_reg_load_be32(&regs->data[priv->sreg_addr]);
+	taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr);
+
+	if (priv->sreg_port)
+		tport = nft_reg_load_be16(&regs->data[priv->sreg_port]);
+	if (!tport)
+		tport = hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT) {
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait4(nft_net(pkt), skb, taddr, tport, sk);
+	} else if (!sk) {
+		/* no, there's no established connection, check if
+		 * there's a listener on the redirected addr/port
+		 */
+		sk = nf_tproxy_get_sock_v4(nft_net(pkt), skb, iph->protocol,
+					   iph->saddr, taddr,
+					   hp->source, tport,
+					   skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+	}
+
+	if (sk && nf_tproxy_sk_is_transparent(sk))
+		nf_tproxy_assign_sock(skb, sk);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+static void nft_tproxy_eval_v6(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	int thoff = nft_thoff(pkt);
+	struct udphdr _hdr, *hp;
+	struct in6_addr taddr;
+	__be16 tport = 0;
+	struct sock *sk;
+	int l4proto;
+
+	memset(&taddr, 0, sizeof(taddr));
+
+	if (pkt->tprot != IPPROTO_TCP &&
+	    pkt->tprot != IPPROTO_UDP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	l4proto = pkt->tprot;
+
+	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	/* check if there's an ongoing connection on the packet addresses, this
+	 * happens if the redirect already happened and the current packet
+	 * belongs to an already established connection
+	 */
+	sk = nf_tproxy_get_sock_v6(nft_net(pkt), skb, thoff, l4proto,
+				   &iph->saddr, &iph->daddr,
+				   hp->source, hp->dest,
+				   nft_in(pkt), NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	if (priv->sreg_addr)
+		memcpy(&taddr, &regs->data[priv->sreg_addr], sizeof(taddr));
+	taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr);
+
+	if (priv->sreg_port)
+		tport = nft_reg_load_be16(&regs->data[priv->sreg_port]);
+	if (!tport)
+		tport = hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT) {
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait6(skb, l4proto, thoff,
+						 nft_net(pkt),
+						 &taddr,
+						 tport,
+						 sk);
+	} else if (!sk) {
+		/* no there's no established connection, check if
+		 * there's a listener on the redirected addr/port
+		 */
+		sk = nf_tproxy_get_sock_v6(nft_net(pkt), skb, thoff,
+					   l4proto, &iph->saddr, &taddr,
+					   hp->source, tport,
+					   nft_in(pkt), NF_TPROXY_LOOKUP_LISTENER);
+	}
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_sk_is_transparent(sk))
+		nf_tproxy_assign_sock(skb, sk);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+#endif
+
+static void nft_tproxy_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		switch (priv->family) {
+		case NFPROTO_IPV4:
+		case NFPROTO_UNSPEC:
+			nft_tproxy_eval_v4(expr, regs, pkt);
+			return;
+		}
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		switch (priv->family) {
+		case NFPROTO_IPV6:
+		case NFPROTO_UNSPEC:
+			nft_tproxy_eval_v6(expr, regs, pkt);
+			return;
+		}
+#endif
+	}
+	regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_tproxy_policy[NFTA_TPROXY_MAX + 1] = {
+	[NFTA_TPROXY_FAMILY]   = { .type = NLA_U32 },
+	[NFTA_TPROXY_REG_ADDR] = { .type = NLA_U32 },
+	[NFTA_TPROXY_REG_PORT] = { .type = NLA_U32 },
+};
+
+static int nft_tproxy_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_tproxy *priv = nft_expr_priv(expr);
+	unsigned int alen = 0;
+	int err;
+
+	if (!tb[NFTA_TPROXY_FAMILY] ||
+	    (!tb[NFTA_TPROXY_REG_ADDR] && !tb[NFTA_TPROXY_REG_PORT]))
+		return -EINVAL;
+
+	priv->family = ntohl(nla_get_be32(tb[NFTA_TPROXY_FAMILY]));
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		if (priv->family != NFPROTO_IPV4)
+			return -EINVAL;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		if (priv->family != NFPROTO_IPV6)
+			return -EINVAL;
+		break;
+#endif
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Address is specified but the rule family is not set accordingly */
+	if (priv->family == NFPROTO_UNSPEC && tb[NFTA_TPROXY_REG_ADDR])
+		return -EINVAL;
+
+	switch (priv->family) {
+	case NFPROTO_IPV4:
+		alen = sizeof_field(union nf_inet_addr, in);
+		err = nf_defrag_ipv4_enable(ctx->net);
+		if (err)
+			return err;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		alen = sizeof_field(union nf_inet_addr, in6);
+		err = nf_defrag_ipv6_enable(ctx->net);
+		if (err)
+			return err;
+		break;
+#endif
+	case NFPROTO_UNSPEC:
+		/* No address is specified here */
+		err = nf_defrag_ipv4_enable(ctx->net);
+		if (err)
+			return err;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+		err = nf_defrag_ipv6_enable(ctx->net);
+		if (err)
+			return err;
+#endif
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_TPROXY_REG_ADDR]) {
+		err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+					      &priv->sreg_addr, alen);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_TPROXY_REG_PORT]) {
+		err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+					      &priv->sreg_port, sizeof(u16));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void nft_tproxy_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	switch (priv->family) {
+	case NFPROTO_IPV4:
+		nf_defrag_ipv4_disable(ctx->net);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nf_defrag_ipv6_disable(ctx->net);
+		break;
+#endif
+	case NFPROTO_UNSPEC:
+		nf_defrag_ipv4_disable(ctx->net);
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+		nf_defrag_ipv6_disable(ctx->net);
+#endif
+		break;
+	}
+}
+
+static int nft_tproxy_dump(struct sk_buff *skb,
+			   const struct nft_expr *expr)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_TPROXY_FAMILY, htonl(priv->family)))
+		return -1;
+
+	if (priv->sreg_addr &&
+	    nft_dump_register(skb, NFTA_TPROXY_REG_ADDR, priv->sreg_addr))
+		return -1;
+
+	if (priv->sreg_port &&
+	    nft_dump_register(skb, NFTA_TPROXY_REG_PORT, priv->sreg_port))
+			return -1;
+
+	return 0;
+}
+
+static int nft_tproxy_validate(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nft_data **data)
+{
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+}
+
+static struct nft_expr_type nft_tproxy_type;
+static const struct nft_expr_ops nft_tproxy_ops = {
+	.type		= &nft_tproxy_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tproxy)),
+	.eval		= nft_tproxy_eval,
+	.init		= nft_tproxy_init,
+	.destroy	= nft_tproxy_destroy,
+	.dump		= nft_tproxy_dump,
+	.reduce		= NFT_REDUCE_READONLY,
+	.validate	= nft_tproxy_validate,
+};
+
+static struct nft_expr_type nft_tproxy_type __read_mostly = {
+	.name		= "tproxy",
+	.ops		= &nft_tproxy_ops,
+	.policy		= nft_tproxy_policy,
+	.maxattr	= NFTA_TPROXY_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_tproxy_module_init(void)
+{
+	return nft_register_expr(&nft_tproxy_type);
+}
+
+static void __exit nft_tproxy_module_exit(void)
+{
+	nft_unregister_expr(&nft_tproxy_type);
+}
+
+module_init(nft_tproxy_module_init);
+module_exit(nft_tproxy_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Máté Eckl");
+MODULE_DESCRIPTION("nf_tables tproxy support module");
+MODULE_ALIAS_NFT_EXPR("tproxy");
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
new file mode 100644
index 000000000..983ade4be
--- /dev/null
+++ b/net/netfilter/nft_tunnel.c
@@ -0,0 +1,750 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/dst_metadata.h>
+#include <net/ip_tunnels.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
+#include <net/geneve.h>
+
+struct nft_tunnel {
+	enum nft_tunnel_keys	key:8;
+	u8			dreg;
+	enum nft_tunnel_mode	mode:8;
+	u8			len;
+};
+
+static void nft_tunnel_get_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	const struct nft_tunnel *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct ip_tunnel_info *tun_info;
+
+	tun_info = skb_tunnel_info(pkt->skb);
+
+	switch (priv->key) {
+	case NFT_TUNNEL_PATH:
+		if (!tun_info) {
+			nft_reg_store8(dest, false);
+			return;
+		}
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			nft_reg_store8(dest, true);
+		else
+			nft_reg_store8(dest, false);
+		break;
+	case NFT_TUNNEL_ID:
+		if (!tun_info) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		else
+			regs->verdict.code = NFT_BREAK;
+		break;
+	default:
+		WARN_ON(1);
+		regs->verdict.code = NFT_BREAK;
+	}
+}
+
+static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
+	[NFTA_TUNNEL_KEY]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_DREG]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_MODE]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_get_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_tunnel *priv = nft_expr_priv(expr);
+	u32 len;
+
+	if (!tb[NFTA_TUNNEL_KEY] ||
+	    !tb[NFTA_TUNNEL_DREG])
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY]));
+	switch (priv->key) {
+	case NFT_TUNNEL_PATH:
+		len = sizeof(u8);
+		break;
+	case NFT_TUNNEL_ID:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_TUNNEL_MODE]) {
+		priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
+		if (priv->mode > NFT_TUNNEL_MODE_MAX)
+			return -EOPNOTSUPP;
+	} else {
+		priv->mode = NFT_TUNNEL_MODE_NONE;
+	}
+
+	priv->len = len;
+	return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, len);
+}
+
+static int nft_tunnel_get_dump(struct sk_buff *skb,
+			       const struct nft_expr *expr)
+{
+	const struct nft_tunnel *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static bool nft_tunnel_get_reduce(struct nft_regs_track *track,
+				  const struct nft_expr *expr)
+{
+	const struct nft_tunnel *priv = nft_expr_priv(expr);
+	const struct nft_tunnel *tunnel;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	tunnel = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != tunnel->key ||
+	    priv->dreg != tunnel->dreg ||
+	    priv->mode != tunnel->mode) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+
+static struct nft_expr_type nft_tunnel_type;
+static const struct nft_expr_ops nft_tunnel_get_ops = {
+	.type		= &nft_tunnel_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tunnel)),
+	.eval		= nft_tunnel_get_eval,
+	.init		= nft_tunnel_get_init,
+	.dump		= nft_tunnel_get_dump,
+	.reduce		= nft_tunnel_get_reduce,
+};
+
+static struct nft_expr_type nft_tunnel_type __read_mostly = {
+	.name		= "tunnel",
+	.family		= NFPROTO_NETDEV,
+	.ops		= &nft_tunnel_get_ops,
+	.policy		= nft_tunnel_policy,
+	.maxattr	= NFTA_TUNNEL_MAX,
+	.owner		= THIS_MODULE,
+};
+
+struct nft_tunnel_opts {
+	union {
+		struct vxlan_metadata	vxlan;
+		struct erspan_metadata	erspan;
+		u8	data[IP_TUNNEL_OPTS_MAX];
+	} u;
+	u32	len;
+	__be16	flags;
+};
+
+struct nft_tunnel_obj {
+	struct metadata_dst	*md;
+	struct nft_tunnel_opts	opts;
+};
+
+static const struct nla_policy nft_tunnel_ip_policy[NFTA_TUNNEL_KEY_IP_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP_SRC]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_KEY_IP_DST]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_obj_ip_init(const struct nft_ctx *ctx,
+				  const struct nlattr *attr,
+				  struct ip_tunnel_info *info)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP_MAX, attr,
+					  nft_tunnel_ip_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_IP_DST])
+		return -EINVAL;
+
+	if (tb[NFTA_TUNNEL_KEY_IP_SRC])
+		info->key.u.ipv4.src = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_SRC]);
+	if (tb[NFTA_TUNNEL_KEY_IP_DST])
+		info->key.u.ipv4.dst = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_DST]);
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_ip6_policy[NFTA_TUNNEL_KEY_IP6_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP6_SRC]	= { .len = sizeof(struct in6_addr), },
+	[NFTA_TUNNEL_KEY_IP6_DST]	= { .len = sizeof(struct in6_addr), },
+	[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]	= { .type = NLA_U32, }
+};
+
+static int nft_tunnel_obj_ip6_init(const struct nft_ctx *ctx,
+				   const struct nlattr *attr,
+				   struct ip_tunnel_info *info)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr,
+					  nft_tunnel_ip6_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_IP6_DST])
+		return -EINVAL;
+
+	if (tb[NFTA_TUNNEL_KEY_IP6_SRC]) {
+		memcpy(&info->key.u.ipv6.src,
+		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_SRC]),
+		       sizeof(struct in6_addr));
+	}
+	if (tb[NFTA_TUNNEL_KEY_IP6_DST]) {
+		memcpy(&info->key.u.ipv6.dst,
+		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_DST]),
+		       sizeof(struct in6_addr));
+	}
+	if (tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL])
+		info->key.label = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]);
+
+	info->mode |= IP_TUNNEL_INFO_IPV6;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_vxlan_policy[NFTA_TUNNEL_KEY_VXLAN_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_VXLAN_GBP]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
+				     struct nft_tunnel_opts *opts)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1];
+	int err;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr,
+					  nft_tunnel_opts_vxlan_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_VXLAN_GBP])
+		return -EINVAL;
+
+	opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
+
+	opts->len	= sizeof(struct vxlan_metadata);
+	opts->flags	= TUNNEL_VXLAN_OPT;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_ERSPAN_VERSION]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]		= { .type = NLA_U8 },
+	[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]	= { .type = NLA_U8 },
+};
+
+static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
+				      struct nft_tunnel_opts *opts)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1];
+	uint8_t hwid, dir;
+	int err, version;
+
+	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX,
+					  attr, nft_tunnel_opts_erspan_policy,
+					  NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION])
+		 return -EINVAL;
+
+	version = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION]));
+	switch (version) {
+	case ERSPAN_VERSION:
+		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX])
+			return -EINVAL;
+
+		opts->u.erspan.u.index =
+			nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]);
+		break;
+	case ERSPAN_VERSION2:
+		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] ||
+		    !tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID])
+			return -EINVAL;
+
+		hwid = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]);
+		dir = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]);
+
+		set_hwid(&opts->u.erspan.u.md2, hwid);
+		opts->u.erspan.u.md2.dir = dir;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	opts->u.erspan.version = version;
+
+	opts->len	= sizeof(struct erspan_metadata);
+	opts->flags	= TUNNEL_ERSPAN_OPT;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_GENEVE_CLASS]	= { .type = NLA_U16 },
+	[NFTA_TUNNEL_KEY_GENEVE_TYPE]	= { .type = NLA_U8 },
+	[NFTA_TUNNEL_KEY_GENEVE_DATA]	= { .type = NLA_BINARY, .len = 128 },
+};
+
+static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
+				      struct nft_tunnel_opts *opts)
+{
+	struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len;
+	struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1];
+	int err, data_len;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_GENEVE_MAX, attr,
+			       nft_tunnel_opts_geneve_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_GENEVE_CLASS] ||
+	    !tb[NFTA_TUNNEL_KEY_GENEVE_TYPE] ||
+	    !tb[NFTA_TUNNEL_KEY_GENEVE_DATA])
+		return -EINVAL;
+
+	attr = tb[NFTA_TUNNEL_KEY_GENEVE_DATA];
+	data_len = nla_len(attr);
+	if (data_len % 4)
+		return -EINVAL;
+
+	opts->len += sizeof(*opt) + data_len;
+	if (opts->len > IP_TUNNEL_OPTS_MAX)
+		return -EINVAL;
+
+	memcpy(opt->opt_data, nla_data(attr), data_len);
+	opt->length = data_len / 4;
+	opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
+	opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
+	opts->flags = TUNNEL_GENEVE_OPT;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_OPTS_UNSPEC]	= {
+		.strict_start_type = NFTA_TUNNEL_KEY_OPTS_GENEVE },
+	[NFTA_TUNNEL_KEY_OPTS_VXLAN]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_OPTS_ERSPAN]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_OPTS_GENEVE]	= { .type = NLA_NESTED, },
+};
+
+static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
+				    const struct nlattr *attr,
+				    struct ip_tunnel_info *info,
+				    struct nft_tunnel_opts *opts)
+{
+	struct nlattr *nla;
+	__be16 type = 0;
+	int err, rem;
+
+	err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
+					     nft_tunnel_opts_policy, NULL);
+	if (err < 0)
+		return err;
+
+	nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(nla)) {
+		case NFTA_TUNNEL_KEY_OPTS_VXLAN:
+			if (type)
+				return -EINVAL;
+			err = nft_tunnel_obj_vxlan_init(nla, opts);
+			if (err)
+				return err;
+			type = TUNNEL_VXLAN_OPT;
+			break;
+		case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
+			if (type)
+				return -EINVAL;
+			err = nft_tunnel_obj_erspan_init(nla, opts);
+			if (err)
+				return err;
+			type = TUNNEL_ERSPAN_OPT;
+			break;
+		case NFTA_TUNNEL_KEY_OPTS_GENEVE:
+			if (type && type != TUNNEL_GENEVE_OPT)
+				return -EINVAL;
+			err = nft_tunnel_obj_geneve_init(nla, opts);
+			if (err)
+				return err;
+			type = TUNNEL_GENEVE_OPT;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return err;
+}
+
+static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_IP6]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_ID]	= { .type = NLA_U32, },
+	[NFTA_TUNNEL_KEY_FLAGS]	= { .type = NLA_U32, },
+	[NFTA_TUNNEL_KEY_TOS]	= { .type = NLA_U8, },
+	[NFTA_TUNNEL_KEY_TTL]	= { .type = NLA_U8, },
+	[NFTA_TUNNEL_KEY_SPORT]	= { .type = NLA_U16, },
+	[NFTA_TUNNEL_KEY_DPORT]	= { .type = NLA_U16, },
+	[NFTA_TUNNEL_KEY_OPTS]	= { .type = NLA_NESTED, },
+};
+
+static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
+			       const struct nlattr * const tb[],
+			       struct nft_object *obj)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct ip_tunnel_info info;
+	struct metadata_dst *md;
+	int err;
+
+	if (!tb[NFTA_TUNNEL_KEY_ID])
+		return -EINVAL;
+
+	memset(&info, 0, sizeof(info));
+	info.mode		= IP_TUNNEL_INFO_TX;
+	info.key.tun_id		= key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
+	info.key.tun_flags	= TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+
+	if (tb[NFTA_TUNNEL_KEY_IP]) {
+		err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
+		if (err < 0)
+			return err;
+	} else if (tb[NFTA_TUNNEL_KEY_IP6]) {
+		err = nft_tunnel_obj_ip6_init(ctx, tb[NFTA_TUNNEL_KEY_IP6], &info);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_TUNNEL_KEY_SPORT]) {
+		info.key.tp_src = nla_get_be16(tb[NFTA_TUNNEL_KEY_SPORT]);
+	}
+	if (tb[NFTA_TUNNEL_KEY_DPORT]) {
+		info.key.tp_dst = nla_get_be16(tb[NFTA_TUNNEL_KEY_DPORT]);
+	}
+
+	if (tb[NFTA_TUNNEL_KEY_FLAGS]) {
+		u32 tun_flags;
+
+		tun_flags = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_FLAGS]));
+		if (tun_flags & ~NFT_TUNNEL_F_MASK)
+			return -EOPNOTSUPP;
+
+		if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
+			info.key.tun_flags &= ~TUNNEL_CSUM;
+		if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
+			info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+		if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
+			info.key.tun_flags |= TUNNEL_SEQ;
+	}
+	if (tb[NFTA_TUNNEL_KEY_TOS])
+		info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
+	if (tb[NFTA_TUNNEL_KEY_TTL])
+		info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
+	else
+		info.key.ttl = U8_MAX;
+
+	if (tb[NFTA_TUNNEL_KEY_OPTS]) {
+		err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],
+					       &info, &priv->opts);
+		if (err < 0)
+			return err;
+	}
+
+	md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL, GFP_KERNEL);
+	if (!md)
+		return -ENOMEM;
+
+	memcpy(&md->u.tun_info, &info, sizeof(info));
+#ifdef CONFIG_DST_CACHE
+	err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
+	if (err < 0) {
+		metadata_dst_free(md);
+		return err;
+	}
+#endif
+	ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len,
+				priv->opts.flags);
+	priv->md = md;
+
+	return 0;
+}
+
+static inline void nft_tunnel_obj_eval(struct nft_object *obj,
+				       struct nft_regs *regs,
+				       const struct nft_pktinfo *pkt)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct sk_buff *skb = pkt->skb;
+
+	skb_dst_drop(skb);
+	dst_hold((struct dst_entry *) priv->md);
+	skb_dst_set(skb, (struct dst_entry *) priv->md);
+}
+
+static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info)
+{
+	struct nlattr *nest;
+
+	if (info->mode & IP_TUNNEL_INFO_IPV6) {
+		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP6);
+		if (!nest)
+			return -1;
+
+		if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC,
+				     &info->key.u.ipv6.src) < 0 ||
+		    nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST,
+				     &info->key.u.ipv6.dst) < 0 ||
+		    nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL,
+				 info->key.label)) {
+			nla_nest_cancel(skb, nest);
+			return -1;
+		}
+
+		nla_nest_end(skb, nest);
+	} else {
+		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP);
+		if (!nest)
+			return -1;
+
+		if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC,
+				    info->key.u.ipv4.src) < 0 ||
+		    nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST,
+				    info->key.u.ipv4.dst) < 0) {
+			nla_nest_cancel(skb, nest);
+			return -1;
+		}
+
+		nla_nest_end(skb, nest);
+	}
+
+	return 0;
+}
+
+static int nft_tunnel_opts_dump(struct sk_buff *skb,
+				struct nft_tunnel_obj *priv)
+{
+	struct nft_tunnel_opts *opts = &priv->opts;
+	struct nlattr *nest, *inner;
+
+	nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS);
+	if (!nest)
+		return -1;
+
+	if (opts->flags & TUNNEL_VXLAN_OPT) {
+		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
+		if (!inner)
+			goto failure;
+		if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP,
+				 htonl(opts->u.vxlan.gbp)))
+			goto inner_failure;
+		nla_nest_end(skb, inner);
+	} else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
+		if (!inner)
+			goto failure;
+		if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION,
+				 htonl(opts->u.erspan.version)))
+			goto inner_failure;
+		switch (opts->u.erspan.version) {
+		case ERSPAN_VERSION:
+			if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX,
+					 opts->u.erspan.u.index))
+				goto inner_failure;
+			break;
+		case ERSPAN_VERSION2:
+			if (nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_HWID,
+				       get_hwid(&opts->u.erspan.u.md2)) ||
+			    nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_DIR,
+				       opts->u.erspan.u.md2.dir))
+				goto inner_failure;
+			break;
+		}
+		nla_nest_end(skb, inner);
+	} else if (opts->flags & TUNNEL_GENEVE_OPT) {
+		struct geneve_opt *opt;
+		int offset = 0;
+
+		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
+		if (!inner)
+			goto failure;
+		while (opts->len > offset) {
+			opt = (struct geneve_opt *)opts->u.data + offset;
+			if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
+					 opt->opt_class) ||
+			    nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE,
+				       opt->type) ||
+			    nla_put(skb, NFTA_TUNNEL_KEY_GENEVE_DATA,
+				    opt->length * 4, opt->opt_data))
+				goto inner_failure;
+			offset += sizeof(*opt) + opt->length * 4;
+		}
+		nla_nest_end(skb, inner);
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+inner_failure:
+	nla_nest_cancel(skb, inner);
+failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int nft_tunnel_ports_dump(struct sk_buff *skb,
+				 struct ip_tunnel_info *info)
+{
+	if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 ||
+	    nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_tunnel_flags_dump(struct sk_buff *skb,
+				 struct ip_tunnel_info *info)
+{
+	u32 flags = 0;
+
+	if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+		flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
+	if (!(info->key.tun_flags & TUNNEL_CSUM))
+		flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
+	if (info->key.tun_flags & TUNNEL_SEQ)
+		flags |= NFT_TUNNEL_F_SEQ_NUMBER;
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_tunnel_obj_dump(struct sk_buff *skb,
+			       struct nft_object *obj, bool reset)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct ip_tunnel_info *info = &priv->md->u.tun_info;
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ID,
+			 tunnel_id_to_key32(info->key.tun_id)) ||
+	    nft_tunnel_ip_dump(skb, info) < 0 ||
+	    nft_tunnel_ports_dump(skb, info) < 0 ||
+	    nft_tunnel_flags_dump(skb, info) < 0 ||
+	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TOS, info->key.tos) ||
+	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TTL, info->key.ttl) ||
+	    nft_tunnel_opts_dump(skb, priv) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx,
+				   struct nft_object *obj)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+
+	metadata_dst_free(priv->md);
+}
+
+static struct nft_object_type nft_tunnel_obj_type;
+static const struct nft_object_ops nft_tunnel_obj_ops = {
+	.type		= &nft_tunnel_obj_type,
+	.size		= sizeof(struct nft_tunnel_obj),
+	.eval		= nft_tunnel_obj_eval,
+	.init		= nft_tunnel_obj_init,
+	.destroy	= nft_tunnel_obj_destroy,
+	.dump		= nft_tunnel_obj_dump,
+};
+
+static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_TUNNEL,
+	.ops		= &nft_tunnel_obj_ops,
+	.maxattr	= NFTA_TUNNEL_KEY_MAX,
+	.policy		= nft_tunnel_key_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_tunnel_module_init(void)
+{
+	int err;
+
+	err = nft_register_expr(&nft_tunnel_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_obj(&nft_tunnel_obj_type);
+	if (err < 0)
+		nft_unregister_expr(&nft_tunnel_type);
+
+	return err;
+}
+
+static void __exit nft_tunnel_module_exit(void)
+{
+	nft_unregister_obj(&nft_tunnel_obj_type);
+	nft_unregister_expr(&nft_tunnel_type);
+}
+
+module_init(nft_tunnel_module_init);
+module_exit(nft_tunnel_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("tunnel");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
+MODULE_DESCRIPTION("nftables tunnel expression support");
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
new file mode 100644
index 000000000..30259846c
--- /dev/null
+++ b/net/netfilter/nft_xfrm.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <net/xfrm.h>
+
+static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
+	[NFTA_XFRM_KEY]		= { .type = NLA_U32 },
+	[NFTA_XFRM_DIR]		= { .type = NLA_U8 },
+	[NFTA_XFRM_SPNUM]	= { .type = NLA_U32 },
+	[NFTA_XFRM_DREG]	= { .type = NLA_U32 },
+};
+
+struct nft_xfrm {
+	enum nft_xfrm_keys	key:8;
+	u8			dreg;
+	u8			dir;
+	u8			spnum;
+	u8			len;
+};
+
+static int nft_xfrm_get_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_xfrm *priv = nft_expr_priv(expr);
+	unsigned int len = 0;
+	u32 spnum = 0;
+	u8 dir;
+
+	if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG])
+		return -EINVAL;
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_XFRM_KEY]));
+	switch (priv->key) {
+	case NFT_XFRM_KEY_REQID:
+	case NFT_XFRM_KEY_SPI:
+		len = sizeof(u32);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP4:
+	case NFT_XFRM_KEY_SADDR_IP4:
+		len = sizeof(struct in_addr);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP6:
+	case NFT_XFRM_KEY_SADDR_IP6:
+		len = sizeof(struct in6_addr);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dir = nla_get_u8(tb[NFTA_XFRM_DIR]);
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+		priv->dir = dir;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_XFRM_SPNUM])
+		spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM]));
+
+	if (spnum >= XFRM_MAX_DEPTH)
+		return -ERANGE;
+
+	priv->spnum = spnum;
+
+	priv->len = len;
+	return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg,
+					NULL, NFT_DATA_VALUE, len);
+}
+
+/* Return true if key asks for daddr/saddr and current
+ * state does have a valid address (BEET, TUNNEL).
+ */
+static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
+{
+	switch (k) {
+	case NFT_XFRM_KEY_DADDR_IP4:
+	case NFT_XFRM_KEY_SADDR_IP4:
+		if (family == NFPROTO_IPV4)
+			break;
+		return false;
+	case NFT_XFRM_KEY_DADDR_IP6:
+	case NFT_XFRM_KEY_SADDR_IP6:
+		if (family == NFPROTO_IPV6)
+			break;
+		return false;
+	default:
+		return true;
+	}
+
+	return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+}
+
+static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
+				   struct nft_regs *regs,
+				   const struct xfrm_state *state)
+{
+	u32 *dest = &regs->data[priv->dreg];
+
+	if (!xfrm_state_addr_ok(priv->key,
+				state->props.family,
+				state->props.mode)) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	switch (priv->key) {
+	case NFT_XFRM_KEY_UNSPEC:
+	case __NFT_XFRM_KEY_MAX:
+		WARN_ON_ONCE(1);
+		break;
+	case NFT_XFRM_KEY_DADDR_IP4:
+		*dest = (__force __u32)state->id.daddr.a4;
+		return;
+	case NFT_XFRM_KEY_DADDR_IP6:
+		memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
+		return;
+	case NFT_XFRM_KEY_SADDR_IP4:
+		*dest = (__force __u32)state->props.saddr.a4;
+		return;
+	case NFT_XFRM_KEY_SADDR_IP6:
+		memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
+		return;
+	case NFT_XFRM_KEY_REQID:
+		*dest = state->props.reqid;
+		return;
+	case NFT_XFRM_KEY_SPI:
+		*dest = (__force __u32)state->id.spi;
+		return;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	const struct sec_path *sp = skb_sec_path(pkt->skb);
+	const struct xfrm_state *state;
+
+	if (sp == NULL || sp->len <= priv->spnum) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	state = sp->xvec[priv->spnum];
+	nft_xfrm_state_get_key(priv, regs, state);
+}
+
+static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct dst_entry *dst = skb_dst(pkt->skb);
+	int i;
+
+	for (i = 0; dst && dst->xfrm;
+	     dst = ((const struct xfrm_dst *)dst)->child, i++) {
+		if (i < priv->spnum)
+			continue;
+
+		nft_xfrm_state_get_key(priv, regs, dst->xfrm);
+		return;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+	switch (priv->dir) {
+	case XFRM_POLICY_IN:
+		nft_xfrm_get_eval_in(priv, regs, pkt);
+		break;
+	case XFRM_POLICY_OUT:
+		nft_xfrm_get_eval_out(priv, regs, pkt);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		regs->verdict.code = NFT_BREAK;
+		break;
+	}
+}
+
+static int nft_xfrm_get_dump(struct sk_buff *skb,
+			     const struct nft_expr *expr)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key)))
+		return -1;
+	if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir))
+		return -1;
+	if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum)))
+		return -1;
+
+	return 0;
+}
+
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			     const struct nft_data **data)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
+	switch (priv->dir) {
+	case XFRM_POLICY_IN:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_PRE_ROUTING);
+		break;
+	case XFRM_POLICY_OUT:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_OUT) |
+			(1 << NF_INET_POST_ROUTING);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static bool nft_xfrm_reduce(struct nft_regs_track *track,
+			    const struct nft_expr *expr)
+{
+	const struct nft_xfrm *priv = nft_expr_priv(expr);
+	const struct nft_xfrm *xfrm;
+
+	if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	xfrm = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != xfrm->key ||
+	    priv->dreg != xfrm->dreg ||
+	    priv->dir != xfrm->dir ||
+	    priv->spnum != xfrm->spnum) {
+		nft_reg_track_update(track, expr, priv->dreg, priv->len);
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return nft_expr_reduce_bitwise(track, expr);
+}
+
+static struct nft_expr_type nft_xfrm_type;
+static const struct nft_expr_ops nft_xfrm_get_ops = {
+	.type		= &nft_xfrm_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_xfrm)),
+	.eval		= nft_xfrm_get_eval,
+	.init		= nft_xfrm_get_init,
+	.dump		= nft_xfrm_get_dump,
+	.validate	= nft_xfrm_validate,
+	.reduce		= nft_xfrm_reduce,
+};
+
+static struct nft_expr_type nft_xfrm_type __read_mostly = {
+	.name		= "xfrm",
+	.ops		= &nft_xfrm_get_ops,
+	.policy		= nft_xfrm_policy,
+	.maxattr	= NFTA_XFRM_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_xfrm_module_init(void)
+{
+	return nft_register_expr(&nft_xfrm_type);
+}
+
+static void __exit nft_xfrm_module_exit(void)
+{
+	nft_unregister_expr(&nft_xfrm_type);
+}
+
+module_init(nft_xfrm_module_init);
+module_exit(nft_xfrm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("xfrm");
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
new file mode 100644
index 000000000..2182d361e
--- /dev/null
+++ b/net/netfilter/utils.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
+
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if ((protocol != IPPROTO_TCP && protocol != IPPROTO_UDP &&
+		    !csum_fold(skb->csum)) ||
+		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
+				       skb->len - dataoff, protocol,
+				       skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		fallthrough;
+	case CHECKSUM_NONE:
+		if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP)
+			skb->csum = 0;
+		else
+			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+						       skb->len - dataoff,
+						       protocol, 0);
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				      unsigned int dataoff, unsigned int len,
+				      u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip_checksum(skb, hook, dataoff, protocol);
+		fallthrough;
+	case CHECKSUM_NONE:
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+					       skb->len - dataoff, 0);
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+}
+
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				     skb->len - dataoff, protocol,
+				     csum_sub(skb->csum,
+					      skb_checksum(skb, 0,
+							   dataoff, 0)))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		fallthrough;
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_unfold(
+				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     skb->len - dataoff,
+					     protocol,
+					     csum_sub(0,
+						      skb_checksum(skb, 0,
+								   dataoff, 0))));
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				       unsigned int dataoff, unsigned int len,
+				       u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum hsum;
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip6_checksum(skb, hook, dataoff, protocol);
+		fallthrough;
+	case CHECKSUM_NONE:
+		hsum = skb_checksum(skb, 0, dataoff, 0);
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+							 &ip6h->daddr,
+							 skb->len - dataoff,
+							 protocol,
+							 csum_sub(0, hsum)));
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+};
+
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+		    unsigned int dataoff, u8 protocol,
+		    unsigned short family)
+{
+	__sum16 csum = 0;
+
+	switch (family) {
+	case AF_INET:
+		csum = nf_ip_checksum(skb, hook, dataoff, protocol);
+		break;
+	case AF_INET6:
+		csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
+		break;
+	}
+
+	return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum);
+
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+			    unsigned int dataoff, unsigned int len,
+			    u8 protocol, unsigned short family)
+{
+	__sum16 csum = 0;
+
+	switch (family) {
+	case AF_INET:
+		csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
+					      protocol);
+		break;
+	case AF_INET6:
+		csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+					       protocol);
+		break;
+	}
+
+	return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum_partial);
+
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+	     bool strict, unsigned short family)
+{
+	const struct nf_ipv6_ops *v6ops __maybe_unused;
+	int ret = 0;
+
+	switch (family) {
+	case AF_INET:
+		ret = nf_ip_route(net, dst, fl, strict);
+		break;
+	case AF_INET6:
+		ret = nf_ip6_route(net, dst, fl, strict);
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_route);
+
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (!(iph->tos == rt_info->tos &&
+		      skb->mark == rt_info->mark &&
+		      iph->daddr == rt_info->daddr &&
+		      iph->saddr == rt_info->saddr))
+			return ip_route_me_harder(entry->state.net, entry->state.sk,
+						  skb, RTN_UNSPEC);
+	}
+#endif
+	return 0;
+}
+
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+	const struct nf_ipv6_ops *v6ops;
+	int ret = 0;
+
+	switch (entry->state.pf) {
+	case AF_INET:
+		ret = nf_ip_reroute(skb, entry);
+		break;
+	case AF_INET6:
+		v6ops = rcu_dereference(nf_ipv6_ops);
+		if (v6ops)
+			ret = v6ops->reroute(skb, entry);
+		break;
+	}
+	return ret;
+}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
new file mode 100644
index 000000000..470282cf3
--- /dev/null
+++ b/net/netfilter/x_tables.c
@@ -0,0 +1,2017 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x_tables core - Backend for {ip,ip6,arp}_tables
+ *
+ * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
+ * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on existing ip_tables code which is
+ *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *   Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/audit.h>
+#include <linux/user_namespace.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
+
+#define XT_PCPU_BLOCK_SIZE 4096
+#define XT_MAX_TABLE_SIZE	(512 * 1024 * 1024)
+
+struct xt_template {
+	struct list_head list;
+
+	/* called when table is needed in the given netns */
+	int (*table_init)(struct net *net);
+
+	struct module *me;
+
+	/* A unique name... */
+	char name[XT_TABLE_MAXNAMELEN];
+};
+
+static struct list_head xt_templates[NFPROTO_NUMPROTO];
+
+struct xt_pernet {
+	struct list_head tables[NFPROTO_NUMPROTO];
+};
+
+struct compat_delta {
+	unsigned int offset; /* offset in kernel */
+	int delta; /* delta in 32bit user land */
+};
+
+struct xt_af {
+	struct mutex mutex;
+	struct list_head match;
+	struct list_head target;
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+	struct mutex compat_mutex;
+	struct compat_delta *compat_tab;
+	unsigned int number; /* number of slots in compat_tab[] */
+	unsigned int cur; /* number of used slots in compat_tab[] */
+#endif
+};
+
+static unsigned int xt_pernet_id __read_mostly;
+static struct xt_af *xt __read_mostly;
+
+static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
+	[NFPROTO_UNSPEC] = "x",
+	[NFPROTO_IPV4]   = "ip",
+	[NFPROTO_ARP]    = "arp",
+	[NFPROTO_BRIDGE] = "eb",
+	[NFPROTO_IPV6]   = "ip6",
+};
+
+/* Registration hooks for targets. */
+int xt_register_target(struct xt_target *target)
+{
+	u_int8_t af = target->family;
+
+	mutex_lock(&xt[af].mutex);
+	list_add(&target->list, &xt[af].target);
+	mutex_unlock(&xt[af].mutex);
+	return 0;
+}
+EXPORT_SYMBOL(xt_register_target);
+
+void
+xt_unregister_target(struct xt_target *target)
+{
+	u_int8_t af = target->family;
+
+	mutex_lock(&xt[af].mutex);
+	list_del(&target->list);
+	mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_target);
+
+int
+xt_register_targets(struct xt_target *target, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_target(&target[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_targets(target, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_targets);
+
+void
+xt_unregister_targets(struct xt_target *target, unsigned int n)
+{
+	while (n-- > 0)
+		xt_unregister_target(&target[n]);
+}
+EXPORT_SYMBOL(xt_unregister_targets);
+
+int xt_register_match(struct xt_match *match)
+{
+	u_int8_t af = match->family;
+
+	mutex_lock(&xt[af].mutex);
+	list_add(&match->list, &xt[af].match);
+	mutex_unlock(&xt[af].mutex);
+	return 0;
+}
+EXPORT_SYMBOL(xt_register_match);
+
+void
+xt_unregister_match(struct xt_match *match)
+{
+	u_int8_t af = match->family;
+
+	mutex_lock(&xt[af].mutex);
+	list_del(&match->list);
+	mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_match);
+
+int
+xt_register_matches(struct xt_match *match, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_match(&match[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_matches(match, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_matches);
+
+void
+xt_unregister_matches(struct xt_match *match, unsigned int n)
+{
+	while (n-- > 0)
+		xt_unregister_match(&match[n]);
+}
+EXPORT_SYMBOL(xt_unregister_matches);
+
+
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use.
+ */
+
+/* Find match, grabs ref.  Returns ERR_PTR() on error. */
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
+{
+	struct xt_match *m;
+	int err = -ENOENT;
+
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(m, &xt[af].match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision == revision) {
+				if (try_module_get(m->me)) {
+					mutex_unlock(&xt[af].mutex);
+					return m;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_match(NFPROTO_UNSPEC, name, revision);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(xt_find_match);
+
+struct xt_match *
+xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
+{
+	struct xt_match *match;
+
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
+	match = xt_find_match(nfproto, name, revision);
+	if (IS_ERR(match)) {
+		request_module("%st_%s", xt_prefix[nfproto], name);
+		match = xt_find_match(nfproto, name, revision);
+	}
+
+	return match;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_match);
+
+/* Find target, grabs ref.  Returns ERR_PTR() on error. */
+static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
+{
+	struct xt_target *t;
+	int err = -ENOENT;
+
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt[af].target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision == revision) {
+				if (try_module_get(t->me)) {
+					mutex_unlock(&xt[af].mutex);
+					return t;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_target(NFPROTO_UNSPEC, name, revision);
+
+	return ERR_PTR(err);
+}
+
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
+{
+	struct xt_target *target;
+
+	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+		return ERR_PTR(-EINVAL);
+
+	target = xt_find_target(af, name, revision);
+	if (IS_ERR(target)) {
+		request_module("%st_%s", xt_prefix[af], name);
+		target = xt_find_target(af, name, revision);
+	}
+
+	return target;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_target);
+
+
+static int xt_obj_to_user(u16 __user *psize, u16 size,
+			  void __user *pname, const char *name,
+			  u8 __user *prev, u8 rev)
+{
+	if (put_user(size, psize))
+		return -EFAULT;
+	if (copy_to_user(pname, name, strlen(name) + 1))
+		return -EFAULT;
+	if (put_user(rev, prev))
+		return -EFAULT;
+
+	return 0;
+}
+
+#define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE)				\
+	xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size,	\
+		       U->u.user.name, K->u.kernel.TYPE->name,		\
+		       &U->u.user.revision, K->u.kernel.TYPE->revision)
+
+int xt_data_to_user(void __user *dst, const void *src,
+		    int usersize, int size, int aligned_size)
+{
+	usersize = usersize ? : size;
+	if (copy_to_user(dst, src, usersize))
+		return -EFAULT;
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_data_to_user);
+
+#define XT_DATA_TO_USER(U, K, TYPE)					\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
+
+int xt_match_to_user(const struct xt_entry_match *m,
+		     struct xt_entry_match __user *u)
+{
+	return XT_OBJ_TO_USER(u, m, match, 0) ||
+	       XT_DATA_TO_USER(u, m, match);
+}
+EXPORT_SYMBOL_GPL(xt_match_to_user);
+
+int xt_target_to_user(const struct xt_entry_target *t,
+		      struct xt_entry_target __user *u)
+{
+	return XT_OBJ_TO_USER(u, t, target, 0) ||
+	       XT_DATA_TO_USER(u, t, target);
+}
+EXPORT_SYMBOL_GPL(xt_target_to_user);
+
+static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
+{
+	const struct xt_match *m;
+	int have_rev = 0;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(m, &xt[af].match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision > *bestp)
+				*bestp = m->revision;
+			if (m->revision == revision)
+				have_rev = 1;
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC && !have_rev)
+		return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
+
+	return have_rev;
+}
+
+static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
+{
+	const struct xt_target *t;
+	int have_rev = 0;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt[af].target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision > *bestp)
+				*bestp = t->revision;
+			if (t->revision == revision)
+				have_rev = 1;
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC && !have_rev)
+		return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
+
+	return have_rev;
+}
+
+/* Returns true or false (if no such extension at all) */
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
+		     int *err)
+{
+	int have_rev, best = -1;
+
+	if (target == 1)
+		have_rev = target_revfn(af, name, revision, &best);
+	else
+		have_rev = match_revfn(af, name, revision, &best);
+
+	/* Nothing at all?  Return 0 to try loading module. */
+	if (best == -1) {
+		*err = -ENOENT;
+		return 0;
+	}
+
+	*err = best;
+	if (!have_rev)
+		*err = -EPROTONOSUPPORT;
+	return 1;
+}
+EXPORT_SYMBOL_GPL(xt_find_revision);
+
+static char *
+textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
+{
+	static const char *const inetbr_names[] = {
+		"PREROUTING", "INPUT", "FORWARD",
+		"OUTPUT", "POSTROUTING", "BROUTING",
+	};
+	static const char *const arp_names[] = {
+		"INPUT", "FORWARD", "OUTPUT",
+	};
+	const char *const *names;
+	unsigned int i, max;
+	char *p = buf;
+	bool np = false;
+	int res;
+
+	names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
+	max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
+	                                   ARRAY_SIZE(inetbr_names);
+	*p = '\0';
+	for (i = 0; i < max; ++i) {
+		if (!(mask & (1 << i)))
+			continue;
+		res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
+		if (res > 0) {
+			size -= res;
+			p += res;
+		}
+		np = true;
+	}
+
+	return buf;
+}
+
+/**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+int xt_check_proc_name(const char *name, unsigned int size)
+{
+	if (name[0] == '\0')
+		return -EINVAL;
+
+	if (strnlen(name, size) == size)
+		return -ENAMETOOLONG;
+
+	if (strcmp(name, ".") == 0 ||
+	    strcmp(name, "..") == 0 ||
+	    strchr(name, '/'))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(xt_check_proc_name);
+
+int xt_check_match(struct xt_mtchk_param *par,
+		   unsigned int size, u16 proto, bool inv_proto)
+{
+	int ret;
+
+	if (XT_ALIGN(par->match->matchsize) != size &&
+	    par->match->matchsize != -1) {
+		/*
+		 * ebt_among is exempt from centralized matchsize checking
+		 * because it uses a dynamic-size data set.
+		 */
+		pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
+				   xt_prefix[par->family], par->match->name,
+				   par->match->revision,
+				   XT_ALIGN(par->match->matchsize), size);
+		return -EINVAL;
+	}
+	if (par->match->table != NULL &&
+	    strcmp(par->match->table, par->table) != 0) {
+		pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
+				    xt_prefix[par->family], par->match->name,
+				    par->match->table, par->table);
+		return -EINVAL;
+	}
+	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
+		char used[64], allow[64];
+
+		pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
+				    xt_prefix[par->family], par->match->name,
+				    textify_hooks(used, sizeof(used),
+						  par->hook_mask, par->family),
+				    textify_hooks(allow, sizeof(allow),
+						  par->match->hooks,
+						  par->family));
+		return -EINVAL;
+	}
+	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
+		pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
+				    xt_prefix[par->family], par->match->name,
+				    par->match->proto);
+		return -EINVAL;
+	}
+	if (par->match->checkentry != NULL) {
+		ret = par->match->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_check_match);
+
+/** xt_check_entry_match - check that matches end before start of target
+ *
+ * @match: beginning of xt_entry_match
+ * @target: beginning of this rules target (alleged end of matches)
+ * @alignment: alignment requirement of match structures
+ *
+ * Validates that all matches add up to the beginning of the target,
+ * and that each match covers at least the base structure size.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int xt_check_entry_match(const char *match, const char *target,
+				const size_t alignment)
+{
+	const struct xt_entry_match *pos;
+	int length = target - match;
+
+	if (length == 0) /* no matches */
+		return 0;
+
+	pos = (struct xt_entry_match *)match;
+	do {
+		if ((unsigned long)pos % alignment)
+			return -EINVAL;
+
+		if (length < (int)sizeof(struct xt_entry_match))
+			return -EINVAL;
+
+		if (pos->u.match_size < sizeof(struct xt_entry_match))
+			return -EINVAL;
+
+		if (pos->u.match_size > length)
+			return -EINVAL;
+
+		length -= pos->u.match_size;
+		pos = ((void *)((char *)(pos) + (pos)->u.match_size));
+	} while (length > 0);
+
+	return 0;
+}
+
+/** xt_check_table_hooks - check hook entry points are sane
+ *
+ * @info xt_table_info to check
+ * @valid_hooks - hook entry points that we can enter from
+ *
+ * Validates that the hook entry and underflows points are set up.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
+{
+	const char *err = "unsorted underflow";
+	unsigned int i, max_uflow, max_entry;
+	bool check_hooks = false;
+
+	BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
+
+	max_entry = 0;
+	max_uflow = 0;
+
+	for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
+		if (!(valid_hooks & (1 << i)))
+			continue;
+
+		if (info->hook_entry[i] == 0xFFFFFFFF)
+			return -EINVAL;
+		if (info->underflow[i] == 0xFFFFFFFF)
+			return -EINVAL;
+
+		if (check_hooks) {
+			if (max_uflow > info->underflow[i])
+				goto error;
+
+			if (max_uflow == info->underflow[i]) {
+				err = "duplicate underflow";
+				goto error;
+			}
+			if (max_entry > info->hook_entry[i]) {
+				err = "unsorted entry";
+				goto error;
+			}
+			if (max_entry == info->hook_entry[i]) {
+				err = "duplicate entry";
+				goto error;
+			}
+		}
+		max_entry = info->hook_entry[i];
+		max_uflow = info->underflow[i];
+		check_hooks = true;
+	}
+
+	return 0;
+error:
+	pr_err_ratelimited("%s at hook %d\n", err, i);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(xt_check_table_hooks);
+
+static bool verdict_ok(int verdict)
+{
+	if (verdict > 0)
+		return true;
+
+	if (verdict < 0) {
+		int v = -verdict - 1;
+
+		if (verdict == XT_RETURN)
+			return true;
+
+		switch (v) {
+		case NF_ACCEPT: return true;
+		case NF_DROP: return true;
+		case NF_QUEUE: return true;
+		default:
+			break;
+		}
+
+		return false;
+	}
+
+	return false;
+}
+
+static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
+			const char *msg, unsigned int msglen)
+{
+	return usersize == kernsize && strnlen(msg, msglen) < msglen;
+}
+
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
+{
+	struct xt_af *xp = &xt[af];
+
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
+	if (WARN_ON(!xp->compat_tab))
+		return -ENOMEM;
+
+	if (xp->cur >= xp->number)
+		return -EINVAL;
+
+	if (xp->cur)
+		delta += xp->compat_tab[xp->cur - 1].delta;
+	xp->compat_tab[xp->cur].offset = offset;
+	xp->compat_tab[xp->cur].delta = delta;
+	xp->cur++;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_add_offset);
+
+void xt_compat_flush_offsets(u_int8_t af)
+{
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
+	if (xt[af].compat_tab) {
+		vfree(xt[af].compat_tab);
+		xt[af].compat_tab = NULL;
+		xt[af].number = 0;
+		xt[af].cur = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
+
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+{
+	struct compat_delta *tmp = xt[af].compat_tab;
+	int mid, left = 0, right = xt[af].cur - 1;
+
+	while (left <= right) {
+		mid = (left + right) >> 1;
+		if (offset > tmp[mid].offset)
+			left = mid + 1;
+		else if (offset < tmp[mid].offset)
+			right = mid - 1;
+		else
+			return mid ? tmp[mid - 1].delta : 0;
+	}
+	return left ? tmp[left - 1].delta : 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
+
+int xt_compat_init_offsets(u8 af, unsigned int number)
+{
+	size_t mem;
+
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
+	if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
+		return -EINVAL;
+
+	if (WARN_ON(xt[af].compat_tab))
+		return -EINVAL;
+
+	mem = sizeof(struct compat_delta) * number;
+	if (mem > XT_MAX_TABLE_SIZE)
+		return -ENOMEM;
+
+	xt[af].compat_tab = vmalloc(mem);
+	if (!xt[af].compat_tab)
+		return -ENOMEM;
+
+	xt[af].number = number;
+	xt[af].cur = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
+int xt_compat_match_offset(const struct xt_match *match)
+{
+	u_int16_t csize = match->compatsize ? : match->matchsize;
+	return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_offset);
+
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			       unsigned int *size)
+{
+	const struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+	int off = xt_compat_match_offset(match);
+	u_int16_t msize = cm->u.user.match_size;
+	char name[sizeof(m->u.user.name)];
+
+	m = *dstptr;
+	memcpy(m, cm, sizeof(*cm));
+	if (match->compat_from_user)
+		match->compat_from_user(m->data, cm->data);
+	else
+		memcpy(m->data, cm->data, msize - sizeof(*cm));
+
+	msize += off;
+	m->u.user.match_size = msize;
+	strscpy(name, match->name, sizeof(name));
+	module_put(match->me);
+	strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+
+	*size += off;
+	*dstptr += msize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size)
+{
+	const struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match __user *cm = *dstptr;
+	int off = xt_compat_match_offset(match);
+	u_int16_t msize = m->u.user.match_size - off;
+
+	if (XT_OBJ_TO_USER(cm, m, match, msize))
+		return -EFAULT;
+
+	if (match->compat_to_user) {
+		if (match->compat_to_user((void __user *)cm->data, m->data))
+			return -EFAULT;
+	} else {
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+			return -EFAULT;
+	}
+
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+
+/* non-compat version may have padding after verdict */
+struct compat_xt_standard_target {
+	struct compat_xt_entry_target t;
+	compat_uint_t verdict;
+};
+
+struct compat_xt_error_target {
+	struct compat_xt_entry_target t;
+	char errorname[XT_FUNCTION_MAXNAMELEN];
+};
+
+int xt_compat_check_entry_offsets(const void *base, const char *elems,
+				  unsigned int target_offset,
+				  unsigned int next_offset)
+{
+	long size_of_base_struct = elems - (const char *)base;
+	const struct compat_xt_entry_target *t;
+	const char *e = base;
+
+	if (target_offset < size_of_base_struct)
+		return -EINVAL;
+
+	if (target_offset + sizeof(*t) > next_offset)
+		return -EINVAL;
+
+	t = (void *)(e + target_offset);
+	if (t->u.target_size < sizeof(*t))
+		return -EINVAL;
+
+	if (target_offset + t->u.target_size > next_offset)
+		return -EINVAL;
+
+	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+		const struct compat_xt_standard_target *st = (const void *)t;
+
+		if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+			return -EINVAL;
+
+		if (!verdict_ok(st->verdict))
+			return -EINVAL;
+	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+		const struct compat_xt_error_target *et = (const void *)t;
+
+		if (!error_tg_ok(t->u.target_size, sizeof(*et),
+				 et->errorname, sizeof(et->errorname)))
+			return -EINVAL;
+	}
+
+	/* compat_xt_entry match has less strict alignment requirements,
+	 * otherwise they are identical.  In case of padding differences
+	 * we need to add compat version of xt_check_entry_match.
+	 */
+	BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
+
+	return xt_check_entry_match(elems, base + target_offset,
+				    __alignof__(struct compat_xt_entry_match));
+}
+EXPORT_SYMBOL(xt_compat_check_entry_offsets);
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
+
+/**
+ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
+ *
+ * @base: pointer to arp/ip/ip6t_entry
+ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
+ * @target_offset: the arp/ip/ip6_t->target_offset
+ * @next_offset: the arp/ip/ip6_t->next_offset
+ *
+ * validates that target_offset and next_offset are sane and that all
+ * match sizes (if any) align with the target offset.
+ *
+ * This function does not validate the targets or matches themselves, it
+ * only tests that all the offsets and sizes are correct, that all
+ * match structures are aligned, and that the last structure ends where
+ * the target structure begins.
+ *
+ * Also see xt_compat_check_entry_offsets for CONFIG_NETFILTER_XTABLES_COMPAT version.
+ *
+ * The arp/ip/ip6t_entry structure @base must have passed following tests:
+ * - it must point to a valid memory location
+ * - base to base + next_offset must be accessible, i.e. not exceed allocated
+ *   length.
+ *
+ * A well-formed entry looks like this:
+ *
+ * ip(6)t_entry   match [mtdata]  match [mtdata] target [tgdata] ip(6)t_entry
+ * e->elems[]-----'                              |               |
+ *                matchsize                      |               |
+ *                                matchsize      |               |
+ *                                               |               |
+ * target_offset---------------------------------'               |
+ * next_offset---------------------------------------------------'
+ *
+ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
+ *          This is where matches (if any) and the target reside.
+ * target_offset: beginning of target.
+ * next_offset: start of the next rule; also: size of this rule.
+ * Since targets have a minimum size, target_offset + minlen <= next_offset.
+ *
+ * Every match stores its size, sum of sizes must not exceed target_offset.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_entry_offsets(const void *base,
+			   const char *elems,
+			   unsigned int target_offset,
+			   unsigned int next_offset)
+{
+	long size_of_base_struct = elems - (const char *)base;
+	const struct xt_entry_target *t;
+	const char *e = base;
+
+	/* target start is within the ip/ip6/arpt_entry struct */
+	if (target_offset < size_of_base_struct)
+		return -EINVAL;
+
+	if (target_offset + sizeof(*t) > next_offset)
+		return -EINVAL;
+
+	t = (void *)(e + target_offset);
+	if (t->u.target_size < sizeof(*t))
+		return -EINVAL;
+
+	if (target_offset + t->u.target_size > next_offset)
+		return -EINVAL;
+
+	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+		const struct xt_standard_target *st = (const void *)t;
+
+		if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+			return -EINVAL;
+
+		if (!verdict_ok(st->verdict))
+			return -EINVAL;
+	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+		const struct xt_error_target *et = (const void *)t;
+
+		if (!error_tg_ok(t->u.target_size, sizeof(*et),
+				 et->errorname, sizeof(et->errorname)))
+			return -EINVAL;
+	}
+
+	return xt_check_entry_match(elems, base + target_offset,
+				    __alignof__(struct xt_entry_match));
+}
+EXPORT_SYMBOL(xt_check_entry_offsets);
+
+/**
+ * xt_alloc_entry_offsets - allocate array to store rule head offsets
+ *
+ * @size: number of entries
+ *
+ * Return: NULL or zeroed kmalloc'd or vmalloc'd array
+ */
+unsigned int *xt_alloc_entry_offsets(unsigned int size)
+{
+	if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int))
+		return NULL;
+
+	return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL);
+
+}
+EXPORT_SYMBOL(xt_alloc_entry_offsets);
+
+/**
+ * xt_find_jump_offset - check if target is a valid jump offset
+ *
+ * @offsets: array containing all valid rule start offsets of a rule blob
+ * @target: the jump target to search for
+ * @size: entries in @offset
+ */
+bool xt_find_jump_offset(const unsigned int *offsets,
+			 unsigned int target, unsigned int size)
+{
+	int m, low = 0, hi = size;
+
+	while (hi > low) {
+		m = (low + hi) / 2u;
+
+		if (offsets[m] > target)
+			hi = m;
+		else if (offsets[m] < target)
+			low = m + 1;
+		else
+			return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(xt_find_jump_offset);
+
+int xt_check_target(struct xt_tgchk_param *par,
+		    unsigned int size, u16 proto, bool inv_proto)
+{
+	int ret;
+
+	if (XT_ALIGN(par->target->targetsize) != size) {
+		pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
+				   xt_prefix[par->family], par->target->name,
+				   par->target->revision,
+				   XT_ALIGN(par->target->targetsize), size);
+		return -EINVAL;
+	}
+	if (par->target->table != NULL &&
+	    strcmp(par->target->table, par->table) != 0) {
+		pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
+				    xt_prefix[par->family], par->target->name,
+				    par->target->table, par->table);
+		return -EINVAL;
+	}
+	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
+		char used[64], allow[64];
+
+		pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
+				    xt_prefix[par->family], par->target->name,
+				    textify_hooks(used, sizeof(used),
+						  par->hook_mask, par->family),
+				    textify_hooks(allow, sizeof(allow),
+						  par->target->hooks,
+						  par->family));
+		return -EINVAL;
+	}
+	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
+		pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
+				    xt_prefix[par->family], par->target->name,
+				    par->target->proto);
+		return -EINVAL;
+	}
+	if (par->target->checkentry != NULL) {
+		ret = par->target->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_check_target);
+
+/**
+ * xt_copy_counters - copy counters and metadata from a sockptr_t
+ *
+ * @arg: src sockptr
+ * @len: alleged size of userspace memory
+ * @info: where to store the xt_counters_info metadata
+ *
+ * Copies counter meta data from @user and stores it in @info.
+ *
+ * vmallocs memory to hold the counters, then copies the counter data
+ * from @user to the new memory and returns a pointer to it.
+ *
+ * If called from a compat syscall, @info gets converted automatically to the
+ * 64bit representation.
+ *
+ * The metadata associated with the counters is stored in @info.
+ *
+ * Return: returns pointer that caller has to test via IS_ERR().
+ * If IS_ERR is false, caller has to vfree the pointer.
+ */
+void *xt_copy_counters(sockptr_t arg, unsigned int len,
+		       struct xt_counters_info *info)
+{
+	size_t offset;
+	void *mem;
+	u64 size;
+
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+	if (in_compat_syscall()) {
+		/* structures only differ in size due to alignment */
+		struct compat_xt_counters_info compat_tmp;
+
+		if (len <= sizeof(compat_tmp))
+			return ERR_PTR(-EINVAL);
+
+		len -= sizeof(compat_tmp);
+		if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0)
+			return ERR_PTR(-EFAULT);
+
+		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
+		info->num_counters = compat_tmp.num_counters;
+		offset = sizeof(compat_tmp);
+	} else
+#endif
+	{
+		if (len <= sizeof(*info))
+			return ERR_PTR(-EINVAL);
+
+		len -= sizeof(*info);
+		if (copy_from_sockptr(info, arg, sizeof(*info)) != 0)
+			return ERR_PTR(-EFAULT);
+
+		offset = sizeof(*info);
+	}
+	info->name[sizeof(info->name) - 1] = '\0';
+
+	size = sizeof(struct xt_counters);
+	size *= info->num_counters;
+
+	if (size != (u64)len)
+		return ERR_PTR(-EINVAL);
+
+	mem = vmalloc(len);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_sockptr_offset(mem, arg, offset, len) == 0)
+		return mem;
+
+	vfree(mem);
+	return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(xt_copy_counters);
+
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+int xt_compat_target_offset(const struct xt_target *target)
+{
+	u_int16_t csize = target->compatsize ? : target->targetsize;
+	return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_offset);
+
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+				unsigned int *size)
+{
+	const struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+	int off = xt_compat_target_offset(target);
+	u_int16_t tsize = ct->u.user.target_size;
+	char name[sizeof(t->u.user.name)];
+
+	t = *dstptr;
+	memcpy(t, ct, sizeof(*ct));
+	if (target->compat_from_user)
+		target->compat_from_user(t->data, ct->data);
+	else
+		memcpy(t->data, ct->data, tsize - sizeof(*ct));
+
+	tsize += off;
+	t->u.user.target_size = tsize;
+	strscpy(name, target->name, sizeof(name));
+	module_put(target->me);
+	strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+
+	*size += off;
+	*dstptr += tsize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
+
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size)
+{
+	const struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target __user *ct = *dstptr;
+	int off = xt_compat_target_offset(target);
+	u_int16_t tsize = t->u.user.target_size - off;
+
+	if (XT_OBJ_TO_USER(ct, t, target, tsize))
+		return -EFAULT;
+
+	if (target->compat_to_user) {
+		if (target->compat_to_user((void __user *)ct->data, t->data))
+			return -EFAULT;
+	} else {
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+			return -EFAULT;
+	}
+
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
+#endif
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size)
+{
+	struct xt_table_info *info = NULL;
+	size_t sz = sizeof(*info) + size;
+
+	if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
+		return NULL;
+
+	info = kvmalloc(sz, GFP_KERNEL_ACCOUNT);
+	if (!info)
+		return NULL;
+
+	memset(info, 0, sizeof(*info));
+	info->size = size;
+	return info;
+}
+EXPORT_SYMBOL(xt_alloc_table_info);
+
+void xt_free_table_info(struct xt_table_info *info)
+{
+	int cpu;
+
+	if (info->jumpstack != NULL) {
+		for_each_possible_cpu(cpu)
+			kvfree(info->jumpstack[cpu]);
+		kvfree(info->jumpstack);
+	}
+
+	kvfree(info);
+}
+EXPORT_SYMBOL(xt_free_table_info);
+
+struct xt_table *xt_find_table(struct net *net, u8 af, const char *name)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	struct xt_table *t;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt_net->tables[af], list) {
+		if (strcmp(t->name, name) == 0) {
+			mutex_unlock(&xt[af].mutex);
+			return t;
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+	return NULL;
+}
+EXPORT_SYMBOL(xt_find_table);
+
+/* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	struct module *owner = NULL;
+	struct xt_template *tmpl;
+	struct xt_table *t;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt_net->tables[af], list)
+		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+			return t;
+
+	/* Table doesn't exist in this netns, check larval list */
+	list_for_each_entry(tmpl, &xt_templates[af], list) {
+		int err;
+
+		if (strcmp(tmpl->name, name))
+			continue;
+		if (!try_module_get(tmpl->me))
+			goto out;
+
+		owner = tmpl->me;
+
+		mutex_unlock(&xt[af].mutex);
+		err = tmpl->table_init(net);
+		if (err < 0) {
+			module_put(owner);
+			return ERR_PTR(err);
+		}
+
+		mutex_lock(&xt[af].mutex);
+		break;
+	}
+
+	/* and once again: */
+	list_for_each_entry(t, &xt_net->tables[af], list)
+		if (strcmp(t->name, name) == 0)
+			return t;
+
+	module_put(owner);
+ out:
+	mutex_unlock(&xt[af].mutex);
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(xt_find_table_lock);
+
+struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
+					    const char *name)
+{
+	struct xt_table *t = xt_find_table_lock(net, af, name);
+
+#ifdef CONFIG_MODULES
+	if (IS_ERR(t)) {
+		int err = request_module("%stable_%s", xt_prefix[af], name);
+		if (err < 0)
+			return ERR_PTR(err);
+		t = xt_find_table_lock(net, af, name);
+	}
+#endif
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
+
+void xt_table_unlock(struct xt_table *table)
+{
+	mutex_unlock(&xt[table->af].mutex);
+}
+EXPORT_SYMBOL_GPL(xt_table_unlock);
+
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+void xt_compat_lock(u_int8_t af)
+{
+	mutex_lock(&xt[af].compat_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_compat_lock);
+
+void xt_compat_unlock(u_int8_t af)
+{
+	mutex_unlock(&xt[af].compat_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_compat_unlock);
+#endif
+
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+
+struct static_key xt_tee_enabled __read_mostly;
+EXPORT_SYMBOL_GPL(xt_tee_enabled);
+
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+	unsigned int size;
+	int cpu;
+
+	size = sizeof(void **) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->jumpstack = kvzalloc(size, GFP_KERNEL);
+	else
+		i->jumpstack = kzalloc(size, GFP_KERNEL);
+	if (i->jumpstack == NULL)
+		return -ENOMEM;
+
+	/* ruleset without jumps -- no stack needed */
+	if (i->stacksize == 0)
+		return 0;
+
+	/* Jumpstack needs to be able to record two full callchains, one
+	 * from the first rule set traversal, plus one table reentrancy
+	 * via -j TEE without clobbering the callchain that brought us to
+	 * TEE target.
+	 *
+	 * This is done by allocating two jumpstacks per cpu, on reentry
+	 * the upper half of the stack is used.
+	 *
+	 * see the jumpstack setup in ipt_do_table() for more details.
+	 */
+	size = sizeof(void *) * i->stacksize * 2u;
+	for_each_possible_cpu(cpu) {
+		i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL,
+			cpu_to_node(cpu));
+		if (i->jumpstack[cpu] == NULL)
+			/*
+			 * Freeing will be done later on by the callers. The
+			 * chain is: xt_replace_table -> __do_replace ->
+			 * do_replace -> xt_free_table_info.
+			 */
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+struct xt_counters *xt_counters_alloc(unsigned int counters)
+{
+	struct xt_counters *mem;
+
+	if (counters == 0 || counters > INT_MAX / sizeof(*mem))
+		return NULL;
+
+	counters *= sizeof(*mem);
+	if (counters > XT_MAX_TABLE_SIZE)
+		return NULL;
+
+	return vzalloc(counters);
+}
+EXPORT_SYMBOL(xt_counters_alloc);
+
+struct xt_table_info *
+xt_replace_table(struct xt_table *table,
+	      unsigned int num_counters,
+	      struct xt_table_info *newinfo,
+	      int *error)
+{
+	struct xt_table_info *private;
+	unsigned int cpu;
+	int ret;
+
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
+	/* Do the substitution. */
+	local_bh_disable();
+	private = table->private;
+
+	/* Check inside lock: is the old number correct? */
+	if (num_counters != private->number) {
+		pr_debug("num_counters != table->private->number (%u/%u)\n",
+			 num_counters, private->number);
+		local_bh_enable();
+		*error = -EAGAIN;
+		return NULL;
+	}
+
+	newinfo->initial_entries = private->initial_entries;
+	/*
+	 * Ensure contents of newinfo are visible before assigning to
+	 * private.
+	 */
+	smp_wmb();
+	table->private = newinfo;
+
+	/* make sure all cpus see new ->private value */
+	smp_mb();
+
+	/*
+	 * Even though table entries have now been swapped, other CPU's
+	 * may still be using the old entries...
+	 */
+	local_bh_enable();
+
+	/* ... so wait for even xt_recseq on all cpus */
+	for_each_possible_cpu(cpu) {
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
+		u32 seq = raw_read_seqcount(s);
+
+		if (seq & 1) {
+			do {
+				cond_resched();
+				cpu_relax();
+			} while (seq == raw_read_seqcount(s));
+		}
+	}
+
+	audit_log_nfcfg(table->name, table->af, private->number,
+			!private->number ? AUDIT_XT_OP_REGISTER :
+					   AUDIT_XT_OP_REPLACE,
+			GFP_KERNEL);
+	return private;
+}
+EXPORT_SYMBOL_GPL(xt_replace_table);
+
+struct xt_table *xt_register_table(struct net *net,
+				   const struct xt_table *input_table,
+				   struct xt_table_info *bootstrap,
+				   struct xt_table_info *newinfo)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	struct xt_table_info *private;
+	struct xt_table *t, *table;
+	int ret;
+
+	/* Don't add one object to multiple lists. */
+	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
+	if (!table) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mutex_lock(&xt[table->af].mutex);
+	/* Don't autoload: we'd eat our tail... */
+	list_for_each_entry(t, &xt_net->tables[table->af], list) {
+		if (strcmp(t->name, table->name) == 0) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+
+	/* Simplifies replace_table code. */
+	table->private = bootstrap;
+
+	if (!xt_replace_table(table, 0, newinfo, &ret))
+		goto unlock;
+
+	private = table->private;
+	pr_debug("table->private->number = %u\n", private->number);
+
+	/* save number of initial entries */
+	private->initial_entries = private->number;
+
+	list_add(&table->list, &xt_net->tables[table->af]);
+	mutex_unlock(&xt[table->af].mutex);
+	return table;
+
+unlock:
+	mutex_unlock(&xt[table->af].mutex);
+	kfree(table);
+out:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(xt_register_table);
+
+void *xt_unregister_table(struct xt_table *table)
+{
+	struct xt_table_info *private;
+
+	mutex_lock(&xt[table->af].mutex);
+	private = table->private;
+	list_del(&table->list);
+	mutex_unlock(&xt[table->af].mutex);
+	audit_log_nfcfg(table->name, table->af, private->number,
+			AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+	kfree(table->ops);
+	kfree(table);
+
+	return private;
+}
+EXPORT_SYMBOL_GPL(xt_unregister_table);
+
+#ifdef CONFIG_PROC_FS
+static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	u8 af = (unsigned long)pde_data(file_inode(seq->file));
+	struct net *net = seq_file_net(seq);
+	struct xt_pernet *xt_net;
+
+	xt_net = net_generic(net, xt_pernet_id);
+
+	mutex_lock(&xt[af].mutex);
+	return seq_list_start(&xt_net->tables[af], *pos);
+}
+
+static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	u8 af = (unsigned long)pde_data(file_inode(seq->file));
+	struct net *net = seq_file_net(seq);
+	struct xt_pernet *xt_net;
+
+	xt_net = net_generic(net, xt_pernet_id);
+
+	return seq_list_next(v, &xt_net->tables[af], pos);
+}
+
+static void xt_table_seq_stop(struct seq_file *seq, void *v)
+{
+	u_int8_t af = (unsigned long)pde_data(file_inode(seq->file));
+
+	mutex_unlock(&xt[af].mutex);
+}
+
+static int xt_table_seq_show(struct seq_file *seq, void *v)
+{
+	struct xt_table *table = list_entry(v, struct xt_table, list);
+
+	if (*table->name)
+		seq_printf(seq, "%s\n", table->name);
+	return 0;
+}
+
+static const struct seq_operations xt_table_seq_ops = {
+	.start	= xt_table_seq_start,
+	.next	= xt_table_seq_next,
+	.stop	= xt_table_seq_stop,
+	.show	= xt_table_seq_show,
+};
+
+/*
+ * Traverse state for ip{,6}_{tables,matches} for helping crossing
+ * the multi-AF mutexes.
+ */
+struct nf_mttg_trav {
+	struct list_head *head, *curr;
+	uint8_t class;
+};
+
+enum {
+	MTTG_TRAV_INIT,
+	MTTG_TRAV_NFP_UNSPEC,
+	MTTG_TRAV_NFP_SPEC,
+	MTTG_TRAV_DONE,
+};
+
+static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
+    bool is_target)
+{
+	static const uint8_t next_class[] = {
+		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
+		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
+	};
+	uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
+	struct nf_mttg_trav *trav = seq->private;
+
+	if (ppos != NULL)
+		++(*ppos);
+
+	switch (trav->class) {
+	case MTTG_TRAV_INIT:
+		trav->class = MTTG_TRAV_NFP_UNSPEC;
+		mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
+ 		break;
+	case MTTG_TRAV_NFP_UNSPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		mutex_lock(&xt[nfproto].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[nfproto].target : &xt[nfproto].match;
+		trav->class = next_class[trav->class];
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		fallthrough;
+	default:
+		return NULL;
+	}
+	return trav;
+}
+
+static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
+    bool is_target)
+{
+	struct nf_mttg_trav *trav = seq->private;
+	unsigned int j;
+
+	trav->class = MTTG_TRAV_INIT;
+	for (j = 0; j < *pos; ++j)
+		if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
+			return NULL;
+	return trav;
+}
+
+static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
+{
+	uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
+	struct nf_mttg_trav *trav = seq->private;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		mutex_unlock(&xt[nfproto].mutex);
+		break;
+	}
+}
+
+static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return xt_mttg_seq_start(seq, pos, false);
+}
+
+static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+	return xt_mttg_seq_next(seq, v, ppos, false);
+}
+
+static int xt_match_seq_show(struct seq_file *seq, void *v)
+{
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_match *match;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		match = list_entry(trav->curr, struct xt_match, list);
+		if (*match->name)
+			seq_printf(seq, "%s\n", match->name);
+	}
+	return 0;
+}
+
+static const struct seq_operations xt_match_seq_ops = {
+	.start	= xt_match_seq_start,
+	.next	= xt_match_seq_next,
+	.stop	= xt_mttg_seq_stop,
+	.show	= xt_match_seq_show,
+};
+
+static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return xt_mttg_seq_start(seq, pos, true);
+}
+
+static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+	return xt_mttg_seq_next(seq, v, ppos, true);
+}
+
+static int xt_target_seq_show(struct seq_file *seq, void *v)
+{
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_target *target;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		target = list_entry(trav->curr, struct xt_target, list);
+		if (*target->name)
+			seq_printf(seq, "%s\n", target->name);
+	}
+	return 0;
+}
+
+static const struct seq_operations xt_target_seq_ops = {
+	.start	= xt_target_seq_start,
+	.next	= xt_target_seq_next,
+	.stop	= xt_mttg_seq_stop,
+	.show	= xt_target_seq_show,
+};
+
+#define FORMAT_TABLES	"_tables_names"
+#define	FORMAT_MATCHES	"_tables_matches"
+#define FORMAT_TARGETS 	"_tables_targets"
+
+#endif /* CONFIG_PROC_FS */
+
+/**
+ * xt_hook_ops_alloc - set up hooks for a new table
+ * @table:	table with metadata needed to set up hooks
+ * @fn:		Hook function
+ *
+ * This function will create the nf_hook_ops that the x_table needs
+ * to hand to xt_hook_link_net().
+ */
+struct nf_hook_ops *
+xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
+{
+	unsigned int hook_mask = table->valid_hooks;
+	uint8_t i, num_hooks = hweight32(hook_mask);
+	uint8_t hooknum;
+	struct nf_hook_ops *ops;
+
+	if (!num_hooks)
+		return ERR_PTR(-EINVAL);
+
+	ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
+	     hook_mask >>= 1, ++hooknum) {
+		if (!(hook_mask & 1))
+			continue;
+		ops[i].hook     = fn;
+		ops[i].pf       = table->af;
+		ops[i].hooknum  = hooknum;
+		ops[i].priority = table->priority;
+		++i;
+	}
+
+	return ops;
+}
+EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
+
+int xt_register_template(const struct xt_table *table,
+			 int (*table_init)(struct net *net))
+{
+	int ret = -EEXIST, af = table->af;
+	struct xt_template *t;
+
+	mutex_lock(&xt[af].mutex);
+
+	list_for_each_entry(t, &xt_templates[af], list) {
+		if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0))
+			goto out_unlock;
+	}
+
+	ret = -ENOMEM;
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		goto out_unlock;
+
+	BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name));
+
+	strscpy(t->name, table->name, sizeof(t->name));
+	t->table_init = table_init;
+	t->me = table->me;
+	list_add(&t->list, &xt_templates[af]);
+	ret = 0;
+out_unlock:
+	mutex_unlock(&xt[af].mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xt_register_template);
+
+void xt_unregister_template(const struct xt_table *table)
+{
+	struct xt_template *t;
+	int af = table->af;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt_templates[af], list) {
+		if (strcmp(table->name, t->name))
+			continue;
+
+		list_del(&t->list);
+		mutex_unlock(&xt[af].mutex);
+		kfree(t);
+		return;
+	}
+
+	mutex_unlock(&xt[af].mutex);
+	WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(xt_unregister_template);
+
+int xt_proto_init(struct net *net, u_int8_t af)
+{
+#ifdef CONFIG_PROC_FS
+	char buf[XT_FUNCTION_MAXNAMELEN];
+	struct proc_dir_entry *proc;
+	kuid_t root_uid;
+	kgid_t root_gid;
+#endif
+
+	if (af >= ARRAY_SIZE(xt_prefix))
+		return -EINVAL;
+
+
+#ifdef CONFIG_PROC_FS
+	root_uid = make_kuid(net->user_ns, 0);
+	root_gid = make_kgid(net->user_ns, 0);
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
+			sizeof(struct seq_net_private),
+			(void *)(unsigned long)af);
+	if (!proc)
+		goto out;
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(proc, root_uid, root_gid);
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	proc = proc_create_seq_private(buf, 0440, net->proc_net,
+			&xt_match_seq_ops, sizeof(struct nf_mttg_trav),
+			(void *)(unsigned long)af);
+	if (!proc)
+		goto out_remove_tables;
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(proc, root_uid, root_gid);
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+	proc = proc_create_seq_private(buf, 0440, net->proc_net,
+			 &xt_target_seq_ops, sizeof(struct nf_mttg_trav),
+			 (void *)(unsigned long)af);
+	if (!proc)
+		goto out_remove_matches;
+	if (uid_valid(root_uid) && gid_valid(root_gid))
+		proc_set_user(proc, root_uid, root_gid);
+#endif
+
+	return 0;
+
+#ifdef CONFIG_PROC_FS
+out_remove_matches:
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	remove_proc_entry(buf, net->proc_net);
+
+out_remove_tables:
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	remove_proc_entry(buf, net->proc_net);
+out:
+	return -1;
+#endif
+}
+EXPORT_SYMBOL_GPL(xt_proto_init);
+
+void xt_proto_fini(struct net *net, u_int8_t af)
+{
+#ifdef CONFIG_PROC_FS
+	char buf[XT_FUNCTION_MAXNAMELEN];
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+	remove_proc_entry(buf, net->proc_net);
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+	remove_proc_entry(buf, net->proc_net);
+
+	strscpy(buf, xt_prefix[af], sizeof(buf));
+	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+	remove_proc_entry(buf, net->proc_net);
+#endif /*CONFIG_PROC_FS*/
+}
+EXPORT_SYMBOL_GPL(xt_proto_fini);
+
+/**
+ * xt_percpu_counter_alloc - allocate x_tables rule counter
+ *
+ * @state: pointer to xt_percpu allocation state
+ * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
+ *
+ * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
+ * contain the address of the real (percpu) counter.
+ *
+ * Rule evaluation needs to use xt_get_this_cpu_counter() helper
+ * to fetch the real percpu counter.
+ *
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.  Freeing any counter may free an entire block, so all
+ * counters allocated using the same state must be freed at the same
+ * time.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
+ * returns false on error.
+ */
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+			     struct xt_counters *counter)
+{
+	BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
+
+	if (nr_cpu_ids <= 1)
+		return true;
+
+	if (!state->mem) {
+		state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+					    XT_PCPU_BLOCK_SIZE);
+		if (!state->mem)
+			return false;
+	}
+	counter->pcnt = (__force unsigned long)(state->mem + state->off);
+	state->off += sizeof(*counter);
+	if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+		state->mem = NULL;
+		state->off = 0;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
+
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+	unsigned long pcnt = counters->pcnt;
+
+	if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
+		free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
+static int __net_init xt_net_init(struct net *net)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	int i;
+
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
+		INIT_LIST_HEAD(&xt_net->tables[i]);
+	return 0;
+}
+
+static void __net_exit xt_net_exit(struct net *net)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	int i;
+
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
+		WARN_ON_ONCE(!list_empty(&xt_net->tables[i]));
+}
+
+static struct pernet_operations xt_net_ops = {
+	.init = xt_net_init,
+	.exit = xt_net_exit,
+	.id   = &xt_pernet_id,
+	.size = sizeof(struct xt_pernet),
+};
+
+static int __init xt_init(void)
+{
+	unsigned int i;
+	int rv;
+
+	for_each_possible_cpu(i) {
+		seqcount_init(&per_cpu(xt_recseq, i));
+	}
+
+	xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+	if (!xt)
+		return -ENOMEM;
+
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+		mutex_init(&xt[i].mutex);
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+		mutex_init(&xt[i].compat_mutex);
+		xt[i].compat_tab = NULL;
+#endif
+		INIT_LIST_HEAD(&xt[i].target);
+		INIT_LIST_HEAD(&xt[i].match);
+		INIT_LIST_HEAD(&xt_templates[i]);
+	}
+	rv = register_pernet_subsys(&xt_net_ops);
+	if (rv < 0)
+		kfree(xt);
+	return rv;
+}
+
+static void __exit xt_fini(void)
+{
+	unregister_pernet_subsys(&xt_net_ops);
+	kfree(xt);
+}
+
+module_init(xt_init);
+module_exit(xt_fini);
+
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 000000000..b6a015aee
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct iphdr _iph;
+	const struct iphdr *ih;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph);
+	if (!ih)
+		return false;
+
+	audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu",
+			 &ih->saddr, &ih->daddr, ih->protocol);
+
+	return true;
+}
+
+static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct ipv6hdr _ip6h;
+	const struct ipv6hdr *ih;
+	u8 nexthdr;
+	__be16 frag_off;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+	if (!ih)
+		return false;
+
+	nexthdr = ih->nexthdr;
+	ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off);
+
+	audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+			 &ih->saddr, &ih->daddr, nexthdr);
+
+	return true;
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct audit_buffer *ab;
+	int fam = -1;
+
+	if (audit_enabled == AUDIT_OFF)
+		goto errout;
+	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+	if (ab == NULL)
+		goto errout;
+
+	audit_log_format(ab, "mark=%#x", skb->mark);
+
+	switch (xt_family(par)) {
+	case NFPROTO_BRIDGE:
+		switch (eth_hdr(skb)->h_proto) {
+		case htons(ETH_P_IP):
+			fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+			break;
+		case htons(ETH_P_IPV6):
+			fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+			break;
+		}
+		break;
+	case NFPROTO_IPV4:
+		fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+		break;
+	case NFPROTO_IPV6:
+		fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+		break;
+	}
+
+	if (fam == -1)
+		audit_log_format(ab, " saddr=? daddr=? proto=-1");
+
+	audit_log_end(ab);
+
+errout:
+	return XT_CONTINUE;
+}
+
+static unsigned int
+audit_tg_ebt(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	audit_tg(skb, par);
+	return EBT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_audit_info *info = par->targinfo;
+
+	if (info->type > XT_AUDIT_TYPE_MAX) {
+		pr_info_ratelimited("Audit type out of range (valid range: 0..%u)\n",
+				    XT_AUDIT_TYPE_MAX);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static struct xt_target audit_tg_reg[] __read_mostly = {
+	{
+		.name		= "AUDIT",
+		.family		= NFPROTO_UNSPEC,
+		.target		= audit_tg,
+		.targetsize	= sizeof(struct xt_audit_info),
+		.checkentry	= audit_tg_check,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "AUDIT",
+		.family		= NFPROTO_BRIDGE,
+		.target		= audit_tg_ebt,
+		.targetsize	= sizeof(struct xt_audit_info),
+		.checkentry	= audit_tg_check,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init audit_tg_init(void)
+{
+	return xt_register_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
+}
+
+static void __exit audit_tg_exit(void)
+{
+	xt_unregister_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 000000000..c8a639f56
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* iptables module for the packet checksum mangling
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * (C) 2010 Red Hat, Inc.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CHECKSUM.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
+MODULE_DESCRIPTION("Xtables: checksum modification");
+MODULE_ALIAS("ipt_CHECKSUM");
+MODULE_ALIAS("ip6t_CHECKSUM");
+
+static unsigned int
+checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	if (skb->ip_summed == CHECKSUM_PARTIAL && !skb_is_gso(skb))
+		skb_checksum_help(skb);
+
+	return XT_CONTINUE;
+}
+
+static int checksum_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_CHECKSUM_info *einfo = par->targinfo;
+	const struct ip6t_ip6 *i6 = par->entryinfo;
+	const struct ipt_ip *i4 = par->entryinfo;
+
+	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
+		pr_info_ratelimited("unsupported CHECKSUM operation %x\n",
+				    einfo->operation);
+		return -EINVAL;
+	}
+	if (!einfo->operation)
+		return -EINVAL;
+
+	switch (par->family) {
+	case NFPROTO_IPV4:
+		if (i4->proto == IPPROTO_UDP &&
+		    (i4->invflags & XT_INV_PROTO) == 0)
+			return 0;
+		break;
+	case NFPROTO_IPV6:
+		if ((i6->flags & IP6T_F_PROTO) &&
+		    i6->proto == IPPROTO_UDP &&
+		    (i6->invflags & XT_INV_PROTO) == 0)
+			return 0;
+		break;
+	}
+
+	pr_warn_once("CHECKSUM should be avoided.  If really needed, restrict with \"-p udp\" and only use in OUTPUT\n");
+	return 0;
+}
+
+static struct xt_target checksum_tg_reg __read_mostly = {
+	.name		= "CHECKSUM",
+	.family		= NFPROTO_UNSPEC,
+	.target		= checksum_tg,
+	.targetsize	= sizeof(struct xt_CHECKSUM_info),
+	.table		= "mangle",
+	.checkentry	= checksum_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init checksum_tg_init(void)
+{
+	return xt_register_target(&checksum_tg_reg);
+}
+
+static void __exit checksum_tg_exit(void)
+{
+	xt_unregister_target(&checksum_tg_reg);
+}
+
+module_init(checksum_tg_init);
+module_exit(checksum_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
new file mode 100644
index 000000000..0accac98d
--- /dev/null
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is a module which is used for setting the skb->priority field
+ * of an skb for qdisc classification.
+ */
+
+/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Qdisc classification");
+MODULE_ALIAS("ipt_CLASSIFY");
+MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
+
+static unsigned int
+classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_classify_target_info *clinfo = par->targinfo;
+
+	skb->priority = clinfo->priority;
+	return XT_CONTINUE;
+}
+
+static struct xt_target classify_tg_reg[] __read_mostly = {
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+		              (1 << NF_INET_POST_ROUTING),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_ARP,
+		.hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init classify_tg_init(void)
+{
+	return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
+}
+
+static void __exit classify_tg_exit(void)
+{
+	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
+}
+
+module_init(classify_tg_init);
+module_exit(classify_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
new file mode 100644
index 000000000..76acecf3e
--- /dev/null
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This module is used to copy security markings from packets
+ * to connections, and restore security markings from connections
+ * back to packets.  This would normally be performed in conjunction
+ * with the SECMARK target and state match.
+ *
+ * Based somewhat on CONNMARK:
+ *   Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
+ *    by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CONNSECMARK.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
+MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
+MODULE_ALIAS("ipt_CONNSECMARK");
+MODULE_ALIAS("ip6t_CONNSECMARK");
+
+/*
+ * If the packet has a security mark and the connection does not, copy
+ * the security mark from the packet to the connection.
+ */
+static void secmark_save(const struct sk_buff *skb)
+{
+	if (skb->secmark) {
+		struct nf_conn *ct;
+		enum ip_conntrack_info ctinfo;
+
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && !ct->secmark) {
+			ct->secmark = skb->secmark;
+			nf_conntrack_event_cache(IPCT_SECMARK, ct);
+		}
+	}
+}
+
+/*
+ * If packet has no security mark, and the connection does, restore the
+ * security mark from the connection to the packet.
+ */
+static void secmark_restore(struct sk_buff *skb)
+{
+	if (!skb->secmark) {
+		const struct nf_conn *ct;
+		enum ip_conntrack_info ctinfo;
+
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && ct->secmark)
+			skb->secmark = ct->secmark;
+	}
+}
+
+static unsigned int
+connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_connsecmark_target_info *info = par->targinfo;
+
+	switch (info->mode) {
+	case CONNSECMARK_SAVE:
+		secmark_save(skb);
+		break;
+
+	case CONNSECMARK_RESTORE:
+		secmark_restore(skb);
+		break;
+
+	default:
+		BUG();
+	}
+
+	return XT_CONTINUE;
+}
+
+static int connsecmark_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_connsecmark_target_info *info = par->targinfo;
+	int ret;
+
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "security") != 0) {
+		pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+				    par->table);
+		return -EINVAL;
+	}
+
+	switch (info->mode) {
+	case CONNSECMARK_SAVE:
+	case CONNSECMARK_RESTORE:
+		break;
+
+	default:
+		pr_info_ratelimited("invalid mode: %hu\n", info->mode);
+		return -EINVAL;
+	}
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_target connsecmark_tg_reg __read_mostly = {
+	.name       = "CONNSECMARK",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connsecmark_tg_check,
+	.destroy    = connsecmark_tg_destroy,
+	.target     = connsecmark_tg,
+	.targetsize = sizeof(struct xt_connsecmark_target_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init connsecmark_tg_init(void)
+{
+	return xt_register_target(&connsecmark_tg_reg);
+}
+
+static void __exit connsecmark_tg_exit(void)
+{
+	xt_unregister_target(&connsecmark_tg_reg);
+}
+
+module_init(connsecmark_tg_init);
+module_exit(connsecmark_tg_exit);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
new file mode 100644
index 000000000..2be2f7a7b
--- /dev/null
+++ b/net/netfilter/xt_CT.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CT.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
+{
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->_nfct != 0)
+		return XT_CONTINUE;
+
+	if (ct) {
+		refcount_inc(&ct->ct_general.use);
+		nf_ct_set(skb, ct, IP_CT_NEW);
+	} else {
+		nf_ct_set(skb, ct, IP_CT_UNTRACKED);
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int xt_ct_target_v0(struct sk_buff *skb,
+				    const struct xt_action_param *par)
+{
+	const struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+
+	return xt_ct_target(skb, ct);
+}
+
+static unsigned int xt_ct_target_v1(struct sk_buff *skb,
+				    const struct xt_action_param *par)
+{
+	const struct xt_ct_target_info_v1 *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+
+	return xt_ct_target(skb, ct);
+}
+
+static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
+{
+	if (par->family == NFPROTO_IPV4) {
+		const struct ipt_entry *e = par->entryinfo;
+
+		if (e->ip.invflags & IPT_INV_PROTO)
+			return 0;
+		return e->ip.proto;
+	} else if (par->family == NFPROTO_IPV6) {
+		const struct ip6t_entry *e = par->entryinfo;
+
+		if (e->ipv6.invflags & IP6T_INV_PROTO)
+			return 0;
+		return e->ipv6.proto;
+	} else
+		return 0;
+}
+
+static int
+xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
+		 const struct xt_tgchk_param *par)
+{
+	struct nf_conntrack_helper *helper;
+	struct nf_conn_help *help;
+	u8 proto;
+
+	proto = xt_ct_find_proto(par);
+	if (!proto) {
+		pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n");
+		return -ENOENT;
+	}
+
+	helper = nf_conntrack_helper_try_module_get(helper_name, par->family,
+						    proto);
+	if (helper == NULL) {
+		pr_info_ratelimited("No such helper \"%s\"\n", helper_name);
+		return -ENOENT;
+	}
+
+	help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+	if (help == NULL) {
+		nf_conntrack_helper_put(helper);
+		return -ENOMEM;
+	}
+
+	rcu_assign_pointer(help->helper, helper);
+	return 0;
+}
+
+static int
+xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
+		  const char *timeout_name)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	const struct nf_conntrack_l4proto *l4proto;
+	u8 proto;
+
+	proto = xt_ct_find_proto(par);
+	if (!proto) {
+		pr_info_ratelimited("You must specify a L4 protocol and not "
+				    "use inversions on it");
+		return -EINVAL;
+	}
+	l4proto = nf_ct_l4proto_find(proto);
+	return nf_ct_set_timeout(par->net, ct, par->family, l4proto->l4proto,
+				 timeout_name);
+
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
+{
+	switch (info->flags & (XT_CT_ZONE_DIR_ORIG |
+			       XT_CT_ZONE_DIR_REPL)) {
+	case XT_CT_ZONE_DIR_ORIG:
+		return NF_CT_ZONE_DIR_ORIG;
+	case XT_CT_ZONE_DIR_REPL:
+		return NF_CT_ZONE_DIR_REPL;
+	default:
+		return NF_CT_DEFAULT_ZONE_DIR;
+	}
+}
+
+static void xt_ct_put_helper(struct nf_conn_help *help)
+{
+	struct nf_conntrack_helper *helper;
+
+	if (!help)
+		return;
+
+	/* not yet exposed to other cpus, or ruleset
+	 * already detached (post-replacement).
+	 */
+	helper = rcu_dereference_raw(help->helper);
+	if (helper)
+		nf_conntrack_helper_put(helper);
+}
+
+static int xt_ct_tg_check(const struct xt_tgchk_param *par,
+			  struct xt_ct_target_info_v1 *info)
+{
+	struct nf_conntrack_zone zone;
+	struct nf_conn_help *help;
+	struct nf_conn *ct;
+	int ret = -EOPNOTSUPP;
+
+	if (info->flags & XT_CT_NOTRACK) {
+		ct = NULL;
+		goto out;
+	}
+
+#ifndef CONFIG_NF_CONNTRACK_ZONES
+	if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
+					 XT_CT_ZONE_DIR_REPL |
+					 XT_CT_ZONE_MARK))
+		goto err1;
+#endif
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		goto err1;
+
+	memset(&zone, 0, sizeof(zone));
+	zone.id = info->zone;
+	zone.dir = xt_ct_flags_to_dir(info);
+	if (info->flags & XT_CT_ZONE_MARK)
+		zone.flags |= NF_CT_FLAG_MARK;
+
+	ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
+	if (!ct) {
+		ret = -ENOMEM;
+		goto err2;
+	}
+
+	if ((info->ct_events || info->exp_events) &&
+	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
+				  GFP_KERNEL)) {
+		ret = -EINVAL;
+		goto err3;
+	}
+
+	if (info->helper[0]) {
+		if (strnlen(info->helper, sizeof(info->helper)) == sizeof(info->helper)) {
+			ret = -ENAMETOOLONG;
+			goto err3;
+		}
+
+		ret = xt_ct_set_helper(ct, info->helper, par);
+		if (ret < 0)
+			goto err3;
+	}
+
+	if (info->timeout[0]) {
+		if (strnlen(info->timeout, sizeof(info->timeout)) == sizeof(info->timeout)) {
+			ret = -ENAMETOOLONG;
+			goto err4;
+		}
+
+		ret = xt_ct_set_timeout(ct, par, info->timeout);
+		if (ret < 0)
+			goto err4;
+	}
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+out:
+	info->ct = ct;
+	return 0;
+
+err4:
+	help = nfct_help(ct);
+	xt_ct_put_helper(help);
+err3:
+	nf_ct_tmpl_free(ct);
+err2:
+	nf_ct_netns_put(par->net, par->family);
+err1:
+	return ret;
+}
+
+static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct xt_ct_target_info_v1 info_v1 = {
+		.flags 		= info->flags,
+		.zone		= info->zone,
+		.ct_events	= info->ct_events,
+		.exp_events	= info->exp_events,
+	};
+	int ret;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
+
+	memcpy(info_v1.helper, info->helper, sizeof(info->helper));
+
+	ret = xt_ct_tg_check(par, &info_v1);
+	if (ret < 0)
+		return ret;
+
+	info->ct = info_v1.ct;
+
+	return ret;
+}
+
+static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info_v1 *info = par->targinfo;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
+
+	return xt_ct_tg_check(par, par->targinfo);
+}
+
+static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info_v1 *info = par->targinfo;
+
+	if (info->flags & ~XT_CT_MASK)
+		return -EINVAL;
+
+	return xt_ct_tg_check(par, par->targinfo);
+}
+
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
+			     struct xt_ct_target_info_v1 *info)
+{
+	struct nf_conn *ct = info->ct;
+	struct nf_conn_help *help;
+
+	if (ct) {
+		help = nfct_help(ct);
+		xt_ct_put_helper(help);
+
+		nf_ct_netns_put(par->net, par->family);
+
+		nf_ct_destroy_timeout(ct);
+		nf_ct_put(info->ct);
+	}
+}
+
+static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct xt_ct_target_info_v1 info_v1 = {
+		.flags 		= info->flags,
+		.zone		= info->zone,
+		.ct_events	= info->ct_events,
+		.exp_events	= info->exp_events,
+		.ct		= info->ct,
+	};
+	memcpy(info_v1.helper, info->helper, sizeof(info->helper));
+
+	xt_ct_tg_destroy(par, &info_v1);
+}
+
+static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+{
+	xt_ct_tg_destroy(par, par->targinfo);
+}
+
+static struct xt_target xt_ct_tg_reg[] __read_mostly = {
+	{
+		.name		= "CT",
+		.family		= NFPROTO_UNSPEC,
+		.targetsize	= sizeof(struct xt_ct_target_info),
+		.usersize	= offsetof(struct xt_ct_target_info, ct),
+		.checkentry	= xt_ct_tg_check_v0,
+		.destroy	= xt_ct_tg_destroy_v0,
+		.target		= xt_ct_target_v0,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "CT",
+		.family		= NFPROTO_UNSPEC,
+		.revision	= 1,
+		.targetsize	= sizeof(struct xt_ct_target_info_v1),
+		.usersize	= offsetof(struct xt_ct_target_info, ct),
+		.checkentry	= xt_ct_tg_check_v1,
+		.destroy	= xt_ct_tg_destroy_v1,
+		.target		= xt_ct_target_v1,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "CT",
+		.family		= NFPROTO_UNSPEC,
+		.revision	= 2,
+		.targetsize	= sizeof(struct xt_ct_target_info_v1),
+		.usersize	= offsetof(struct xt_ct_target_info, ct),
+		.checkentry	= xt_ct_tg_check_v2,
+		.destroy	= xt_ct_tg_destroy_v1,
+		.target		= xt_ct_target_v1,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+};
+
+static unsigned int
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->_nfct != 0)
+		return XT_CONTINUE;
+
+	nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+	return XT_CONTINUE;
+}
+
+static struct xt_target notrack_tg_reg __read_mostly = {
+	.name		= "NOTRACK",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.target		= notrack_tg,
+	.table		= "raw",
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_ct_tg_init(void)
+{
+	int ret;
+
+	ret = xt_register_target(&notrack_tg_reg);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+	if (ret < 0) {
+		xt_unregister_target(&notrack_tg_reg);
+		return ret;
+	}
+	return 0;
+}
+
+static void __exit xt_ct_tg_exit(void)
+{
+	xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
+	xt_unregister_target(&notrack_tg_reg);
+}
+
+module_init(xt_ct_tg_init);
+module_exit(xt_ct_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: connection tracking target");
+MODULE_ALIAS("ipt_CT");
+MODULE_ALIAS("ip6t_CT");
+MODULE_ALIAS("ipt_NOTRACK");
+MODULE_ALIAS("ip6t_NOTRACK");
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 000000000..cfa44515a
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_DSCP");
+MODULE_ALIAS("ip6t_DSCP");
+MODULE_ALIAS("ipt_TOS");
+MODULE_ALIAS("ip6t_TOS");
+
+#define XT_DSCP_ECN_MASK	3u
+
+static unsigned int
+dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_DSCP_info *dinfo = par->targinfo;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		ipv4_change_dsfield(ip_hdr(skb), XT_DSCP_ECN_MASK,
+				    dinfo->dscp << XT_DSCP_SHIFT);
+
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int
+dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_DSCP_info *dinfo = par->targinfo;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
+			return NF_DROP;
+
+		ipv6_change_dsfield(ipv6_hdr(skb), XT_DSCP_ECN_MASK,
+				    dinfo->dscp << XT_DSCP_SHIFT);
+	}
+	return XT_CONTINUE;
+}
+
+static int dscp_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_DSCP_info *info = par->targinfo;
+
+	if (info->dscp > XT_DSCP_MAX)
+		return -EDOM;
+	return 0;
+}
+
+static unsigned int
+tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tos_target_info *info = par->targinfo;
+	struct iphdr *iph = ip_hdr(skb);
+	u_int8_t orig, nv;
+
+	orig = ipv4_get_dsfield(iph);
+	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
+
+	if (orig != nv) {
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+		iph = ip_hdr(skb);
+		ipv4_change_dsfield(iph, 0, nv);
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int
+tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tos_target_info *info = par->targinfo;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	u_int8_t orig, nv;
+
+	orig = ipv6_get_dsfield(iph);
+	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
+
+	if (orig != nv) {
+		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
+			return NF_DROP;
+		iph = ipv6_hdr(skb);
+		ipv6_change_dsfield(iph, 0, nv);
+	}
+
+	return XT_CONTINUE;
+}
+
+static struct xt_target dscp_tg_reg[] __read_mostly = {
+	{
+		.name		= "DSCP",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= dscp_tg_check,
+		.target		= dscp_tg,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DSCP",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= dscp_tg_check,
+		.target		= dscp_tg6,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TOS",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.table		= "mangle",
+		.target		= tos_tg,
+		.targetsize	= sizeof(struct xt_tos_target_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TOS",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.table		= "mangle",
+		.target		= tos_tg6,
+		.targetsize	= sizeof(struct xt_tos_target_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init dscp_tg_init(void)
+{
+	return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
+}
+
+static void __exit dscp_tg_exit(void)
+{
+	xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
+}
+
+module_init(dscp_tg_init);
+module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
new file mode 100644
index 000000000..7873b834c
--- /dev/null
+++ b/net/netfilter/xt_HL.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TTL modification target for IP tables
+ * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Hop Limit modification target for ip6tables
+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_TTL.h>
+#include <linux/netfilter_ipv6/ip6t_HL.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
+MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct iphdr *iph;
+	const struct ipt_TTL_info *info = par->targinfo;
+	int new_ttl;
+
+	if (skb_ensure_writable(skb, sizeof(*iph)))
+		return NF_DROP;
+
+	iph = ip_hdr(skb);
+
+	switch (info->mode) {
+	case IPT_TTL_SET:
+		new_ttl = info->ttl;
+		break;
+	case IPT_TTL_INC:
+		new_ttl = iph->ttl + info->ttl;
+		if (new_ttl > 255)
+			new_ttl = 255;
+		break;
+	case IPT_TTL_DEC:
+		new_ttl = iph->ttl - info->ttl;
+		if (new_ttl < 0)
+			new_ttl = 0;
+		break;
+	default:
+		new_ttl = iph->ttl;
+		break;
+	}
+
+	if (new_ttl != iph->ttl) {
+		csum_replace2(&iph->check, htons(iph->ttl << 8),
+					   htons(new_ttl << 8));
+		iph->ttl = new_ttl;
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int
+hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct ipv6hdr *ip6h;
+	const struct ip6t_HL_info *info = par->targinfo;
+	int new_hl;
+
+	if (skb_ensure_writable(skb, sizeof(*ip6h)))
+		return NF_DROP;
+
+	ip6h = ipv6_hdr(skb);
+
+	switch (info->mode) {
+	case IP6T_HL_SET:
+		new_hl = info->hop_limit;
+		break;
+	case IP6T_HL_INC:
+		new_hl = ip6h->hop_limit + info->hop_limit;
+		if (new_hl > 255)
+			new_hl = 255;
+		break;
+	case IP6T_HL_DEC:
+		new_hl = ip6h->hop_limit - info->hop_limit;
+		if (new_hl < 0)
+			new_hl = 0;
+		break;
+	default:
+		new_hl = ip6h->hop_limit;
+		break;
+	}
+
+	ip6h->hop_limit = new_hl;
+
+	return XT_CONTINUE;
+}
+
+static int ttl_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct ipt_TTL_info *info = par->targinfo;
+
+	if (info->mode > IPT_TTL_MAXMODE)
+		return -EINVAL;
+	if (info->mode != IPT_TTL_SET && info->ttl == 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int hl_tg6_check(const struct xt_tgchk_param *par)
+{
+	const struct ip6t_HL_info *info = par->targinfo;
+
+	if (info->mode > IP6T_HL_MAXMODE)
+		return -EINVAL;
+	if (info->mode != IP6T_HL_SET && info->hop_limit == 0)
+		return -EINVAL;
+	return 0;
+}
+
+static struct xt_target hl_tg_reg[] __read_mostly = {
+	{
+		.name       = "TTL",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.target     = ttl_tg,
+		.targetsize = sizeof(struct ipt_TTL_info),
+		.table      = "mangle",
+		.checkentry = ttl_tg_check,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "HL",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.target     = hl_tg6,
+		.targetsize = sizeof(struct ip6t_HL_info),
+		.table      = "mangle",
+		.checkentry = hl_tg6_check,
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init hl_tg_init(void)
+{
+	return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
+}
+
+static void __exit hl_tg_exit(void)
+{
+	xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
+}
+
+module_init(hl_tg_init);
+module_exit(hl_tg_exit);
+MODULE_ALIAS("ipt_TTL");
+MODULE_ALIAS("ip6t_HL");
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
new file mode 100644
index 000000000..8928ec56c
--- /dev/null
+++ b/net/netfilter/xt_HMARK.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xt_HMARK - Netfilter module to set mark by means of hashing
+ *
+ * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_HMARK.h>
+
+#include <net/ip.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
+MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
+MODULE_ALIAS("ipt_HMARK");
+MODULE_ALIAS("ip6t_HMARK");
+
+struct hmark_tuple {
+	__be32			src;
+	__be32			dst;
+	union hmark_ports	uports;
+	u8			proto;
+};
+
+static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
+{
+	return (addr32[0] & mask[0]) ^
+	       (addr32[1] & mask[1]) ^
+	       (addr32[2] & mask[2]) ^
+	       (addr32[3] & mask[3]);
+}
+
+static inline __be32
+hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
+{
+	switch (l3num) {
+	case AF_INET:
+		return *addr32 & *mask;
+	case AF_INET6:
+		return hmark_addr6_mask(addr32, mask);
+	}
+	return 0;
+}
+
+static inline void hmark_swap_ports(union hmark_ports *uports,
+				    const struct xt_hmark_info *info)
+{
+	union hmark_ports hp;
+	u16 src, dst;
+
+	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
+	src = ntohs(hp.b16.src);
+	dst = ntohs(hp.b16.dst);
+
+	if (dst > src)
+		uports->v32 = (dst << 16) | src;
+	else
+		uports->v32 = (src << 16) | dst;
+}
+
+static int
+hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
+		    const struct xt_hmark_info *info)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conntrack_tuple *otuple;
+	struct nf_conntrack_tuple *rtuple;
+
+	if (ct == NULL)
+		return -1;
+
+	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
+				 info->src_mask.ip6);
+	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
+				 info->dst_mask.ip6);
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = nf_ct_protonum(ct);
+	if (t->proto != IPPROTO_ICMP) {
+		t->uports.b16.src = otuple->src.u.all;
+		t->uports.b16.dst = rtuple->src.u.all;
+		hmark_swap_ports(&t->uports, info);
+	}
+
+	return 0;
+#else
+	return -1;
+#endif
+}
+
+/* This hash function is endian independent, to ensure consistent hashing if
+ * the cluster is composed of big and little endian systems. */
+static inline u32
+hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
+{
+	u32 hash;
+	u32 src = ntohl(t->src);
+	u32 dst = ntohl(t->dst);
+
+	if (dst < src)
+		swap(src, dst);
+
+	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
+	hash = hash ^ (t->proto & info->proto_mask);
+
+	return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
+}
+
+static void
+hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
+		      struct hmark_tuple *t, const struct xt_hmark_info *info)
+{
+	int protoff;
+
+	protoff = proto_ports_offset(t->proto);
+	if (protoff < 0)
+		return;
+
+	nhoff += protoff;
+	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
+		return;
+
+	hmark_swap_ports(&t->uports, info);
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
+{
+	struct icmp6hdr *icmp6h, _ih6;
+
+	icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
+	if (icmp6h == NULL)
+		return 0;
+
+	if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
+		*offset += sizeof(struct icmp6hdr);
+		return 1;
+	}
+	return 0;
+}
+
+static int
+hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
+			  const struct xt_hmark_info *info)
+{
+	struct ipv6hdr *ip6, _ip6;
+	int flag = IP6_FH_F_AUTH;
+	unsigned int nhoff = 0;
+	u16 fragoff = 0;
+	int nexthdr;
+
+	ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
+	nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
+	if (nexthdr < 0)
+		return 0;
+	/* No need to check for icmp errors on fragments */
+	if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
+		goto noicmp;
+	/* Use inner header in case of ICMP errors */
+	if (get_inner6_hdr(skb, &nhoff)) {
+		ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
+		if (ip6 == NULL)
+			return -1;
+		/* If AH present, use SPI like in ESP. */
+		flag = IP6_FH_F_AUTH;
+		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
+		if (nexthdr < 0)
+			return -1;
+	}
+noicmp:
+	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
+	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = nexthdr;
+	if (t->proto == IPPROTO_ICMPV6)
+		return 0;
+
+	if (flag & IP6_FH_F_FRAG)
+		return 0;
+
+	hmark_set_tuple_ports(skb, nhoff, t, info);
+	return 0;
+}
+
+static unsigned int
+hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+	struct hmark_tuple t;
+
+	memset(&t, 0, sizeof(struct hmark_tuple));
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
+		if (hmark_ct_set_htuple(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	} else {
+		if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	}
+
+	skb->mark = hmark_hash(&t, info);
+	return XT_CONTINUE;
+}
+#endif
+
+static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
+{
+	const struct icmphdr *icmph;
+	struct icmphdr _ih;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
+	if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
+		return 0;
+
+	/* Error message? */
+	if (!icmp_is_err(icmph->type))
+		return 0;
+
+	*nhoff += iphsz + sizeof(_ih);
+	return 1;
+}
+
+static int
+hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
+			  const struct xt_hmark_info *info)
+{
+	struct iphdr *ip, _ip;
+	int nhoff = skb_network_offset(skb);
+
+	ip = (struct iphdr *) (skb->data + nhoff);
+	if (ip->protocol == IPPROTO_ICMP) {
+		/* Use inner header in case of ICMP errors */
+		if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
+			ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
+			if (ip == NULL)
+				return -1;
+		}
+	}
+
+	t->src = ip->saddr & info->src_mask.ip;
+	t->dst = ip->daddr & info->dst_mask.ip;
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = ip->protocol;
+
+	/* ICMP has no ports, skip */
+	if (t->proto == IPPROTO_ICMP)
+		return 0;
+
+	/* follow-up fragments don't contain ports, skip all fragments */
+	if (ip_is_fragment(ip))
+		return 0;
+
+	hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
+
+	return 0;
+}
+
+static unsigned int
+hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+	struct hmark_tuple t;
+
+	memset(&t, 0, sizeof(struct hmark_tuple));
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
+		if (hmark_ct_set_htuple(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	} else {
+		if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	}
+
+	skb->mark = hmark_hash(&t, info);
+	return XT_CONTINUE;
+}
+
+static int hmark_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+	const char *errmsg = "proto mask must be zero with L3 mode";
+
+	if (!info->hmodulus)
+		return -EINVAL;
+
+	if (info->proto_mask &&
+	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
+		goto err;
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
+	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
+			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
+		return -EINVAL;
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
+	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
+			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
+		errmsg = "spi-set and port-set can't be combined";
+		goto err;
+	}
+	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
+}
+
+static struct xt_target hmark_tg_reg[] __read_mostly = {
+	{
+		.name		= "HMARK",
+		.family		= NFPROTO_IPV4,
+		.target		= hmark_tg_v4,
+		.targetsize	= sizeof(struct xt_hmark_info),
+		.checkentry	= hmark_tg_check,
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "HMARK",
+		.family		= NFPROTO_IPV6,
+		.target		= hmark_tg_v6,
+		.targetsize	= sizeof(struct xt_hmark_info),
+		.checkentry	= hmark_tg_check,
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init hmark_tg_init(void)
+{
+	return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
+}
+
+static void __exit hmark_tg_exit(void)
+{
+	xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
+}
+
+module_init(hmark_tg_init);
+module_exit(hmark_tg_exit);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 000000000..0f8bb0bf5
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/alarmtimer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kdev_t.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg {
+	struct list_head entry;
+	struct alarm alarm;
+	struct timer_list timer;
+	struct work_struct work;
+
+	struct kobject *kobj;
+	struct device_attribute attr;
+
+	unsigned int refcnt;
+	u8 timer_type;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+	struct idletimer_tg *entry;
+
+	list_for_each_entry(entry, &idletimer_tg_list, entry) {
+		if (!strcmp(label, entry->attr.attr.name))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct idletimer_tg *timer;
+	unsigned long expires = 0;
+	struct timespec64 ktimespec = {};
+	long time_diff = 0;
+
+	mutex_lock(&list_mutex);
+
+	timer =	__idletimer_tg_find_by_label(attr->attr.name);
+	if (timer) {
+		if (timer->timer_type & XT_IDLETIMER_ALARM) {
+			ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm);
+			ktimespec = ktime_to_timespec64(expires_alarm);
+			time_diff = ktimespec.tv_sec;
+		} else {
+			expires = timer->timer.expires;
+			time_diff = jiffies_to_msecs(expires - jiffies) / 1000;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+
+	if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
+		return sysfs_emit(buf, "%ld\n", time_diff);
+
+	return sysfs_emit(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+	struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+						  work);
+
+	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(struct timer_list *t)
+{
+	struct idletimer_tg *timer = from_timer(timer, t, timer);
+
+	pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+	schedule_work(&timer->work);
+}
+
+static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm,
+							  ktime_t now)
+{
+	struct idletimer_tg *timer = alarm->data;
+
+	pr_debug("alarm %s expired\n", timer->attr.attr.name);
+	schedule_work(&timer->work);
+	return ALARMTIMER_NORESTART;
+}
+
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+	int ret;
+
+	ret = xt_check_proc_name(name, size);
+	if (ret < 0)
+		return ret;
+
+	if (!strcmp(name, "power") ||
+	    !strcmp(name, "subsystem") ||
+	    !strcmp(name, "uevent"))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+	int ret;
+
+	info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+	if (ret < 0)
+		goto out_free_timer;
+
+	sysfs_attr_init(&info->timer->attr.attr);
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = 0444;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+
+	timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
+	info->timer->refcnt = 1;
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info)
+{
+	int ret;
+
+	info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+	if (ret < 0)
+		goto out_free_timer;
+
+	sysfs_attr_init(&info->timer->attr.attr);
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = 0444;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	/*  notify userspace  */
+	kobject_uevent(idletimer_tg_kobj,KOBJ_ADD);
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+	pr_debug("timer type value is %u", info->timer_type);
+	info->timer->timer_type = info->timer_type;
+	info->timer->refcnt = 1;
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
+		ktime_t tout;
+		alarm_init(&info->timer->alarm, ALARM_BOOTTIME,
+			   idletimer_tg_alarmproc);
+		info->timer->alarm.data = info->timer;
+		tout = ktime_set(info->timeout, 0);
+		alarm_start_relative(&info->timer->alarm, tout);
+	} else {
+		timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
+		mod_timer(&info->timer->timer,
+				msecs_to_jiffies(info->timeout * 1000) + jiffies);
+	}
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return XT_CONTINUE;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target_v1(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info_v1 *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
+		ktime_t tout = ktime_set(info->timeout, 0);
+		alarm_start_relative(&info->timer->alarm, tout);
+	} else {
+		mod_timer(&info->timer->timer,
+				msecs_to_jiffies(info->timeout * 1000) + jiffies);
+	}
+
+	return XT_CONTINUE;
+}
+
+static int idletimer_tg_helper(struct idletimer_tg_info *info)
+{
+	if (info->timeout == 0) {
+		pr_debug("timeout value is zero\n");
+		return -EINVAL;
+	}
+	if (info->timeout >= INT_MAX / 1000) {
+		pr_debug("timeout value is too big\n");
+		return -EINVAL;
+	}
+	if (info->label[0] == '\0' ||
+	    strnlen(info->label,
+		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+		pr_debug("label is empty or not nul-terminated\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	ret = idletimer_tg_helper(info);
+	if(ret < 0)
+	{
+		pr_debug("checkentry helper return invalid\n");
+		return -EINVAL;
+	}
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		info->timer->refcnt++;
+		mod_timer(&info->timer->timer,
+			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info_v1 *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	if (info->send_nl_msg)
+		return -EOPNOTSUPP;
+
+	ret = idletimer_tg_helper((struct idletimer_tg_info *)info);
+	if(ret < 0)
+	{
+		pr_debug("checkentry helper return invalid\n");
+		return -EINVAL;
+	}
+
+	if (info->timer_type > XT_IDLETIMER_ALARM) {
+		pr_debug("invalid value for timer type\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		if (info->timer->timer_type != info->timer_type) {
+			pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n");
+			mutex_unlock(&list_mutex);
+			return -EINVAL;
+		}
+
+		info->timer->refcnt++;
+		if (info->timer_type & XT_IDLETIMER_ALARM) {
+			/* calculate remaining expiry time */
+			ktime_t tout = alarm_expires_remaining(&info->timer->alarm);
+			struct timespec64 ktimespec = ktime_to_timespec64(tout);
+
+			if (ktimespec.tv_sec > 0) {
+				pr_debug("time_expiry_remaining %lld\n",
+					 ktimespec.tv_sec);
+				alarm_start_relative(&info->timer->alarm, tout);
+			}
+		} else {
+				mod_timer(&info->timer->timer,
+					msecs_to_jiffies(info->timeout * 1000) + jiffies);
+		}
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create_v1(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		del_timer_sync(&info->timer->timer);
+		cancel_work_sync(&info->timer->work);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info_v1 *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
+			alarm_cancel(&info->timer->alarm);
+		} else {
+			del_timer_sync(&info->timer->timer);
+		}
+		cancel_work_sync(&info->timer->work);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+
+static struct xt_target idletimer_tg[] __read_mostly = {
+	{
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.target		= idletimer_tg_target,
+	.targetsize     = sizeof(struct idletimer_tg_info),
+	.usersize	= offsetof(struct idletimer_tg_info, timer),
+	.checkentry	= idletimer_tg_checkentry,
+	.destroy        = idletimer_tg_destroy,
+	.me		= THIS_MODULE,
+	},
+	{
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.revision	= 1,
+	.target		= idletimer_tg_target_v1,
+	.targetsize     = sizeof(struct idletimer_tg_info_v1),
+	.usersize	= offsetof(struct idletimer_tg_info_v1, timer),
+	.checkentry	= idletimer_tg_checkentry_v1,
+	.destroy        = idletimer_tg_destroy_v1,
+	.me		= THIS_MODULE,
+	},
+
+
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+	int err;
+
+	idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+	err = PTR_ERR(idletimer_tg_class);
+	if (IS_ERR(idletimer_tg_class)) {
+		pr_debug("couldn't register device class\n");
+		goto out;
+	}
+
+	idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+					    MKDEV(0, 0), NULL, "timers");
+	err = PTR_ERR(idletimer_tg_device);
+	if (IS_ERR(idletimer_tg_device)) {
+		pr_debug("couldn't register system device\n");
+		goto out_class;
+	}
+
+	idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+	err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg));
+
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		goto out_dev;
+	}
+
+	return 0;
+out_dev:
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+	class_destroy(idletimer_tg_class);
+out:
+	return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+	xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg));
+
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+	class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
new file mode 100644
index 000000000..0371c387b
--- /dev/null
+++ b/net/netfilter/xt_LED.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xt_LED.c - netfilter target to make LEDs blink upon packet matches
+ *
+ * Copyright (C) 2008 Adam Nielsen <a.nielsen@shikadi.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+#include <linux/mutex.h>
+
+#include <linux/netfilter/xt_LED.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
+MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
+
+static LIST_HEAD(xt_led_triggers);
+static DEFINE_MUTEX(xt_led_mutex);
+
+/*
+ * This is declared in here (the kernel module) only, to avoid having these
+ * dependencies in userspace code.  This is what xt_led_info.internal_data
+ * points to.
+ */
+struct xt_led_info_internal {
+	struct list_head list;
+	int refcnt;
+	char *trigger_id;
+	struct led_trigger netfilter_led_trigger;
+	struct timer_list timer;
+};
+
+#define XT_LED_BLINK_DELAY 50 /* ms */
+
+static unsigned int
+led_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+	unsigned long led_delay = XT_LED_BLINK_DELAY;
+
+	/*
+	 * If "always blink" is enabled, and there's still some time until the
+	 * LED will switch off, briefly switch it off now.
+	 */
+	if ((ledinfo->delay > 0) && ledinfo->always_blink &&
+	    timer_pending(&ledinternal->timer))
+		led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
+					  &led_delay, &led_delay, 1);
+	else
+		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
+
+	/* If there's a positive delay, start/update the timer */
+	if (ledinfo->delay > 0) {
+		mod_timer(&ledinternal->timer,
+			  jiffies + msecs_to_jiffies(ledinfo->delay));
+
+	/* Otherwise if there was no delay given, blink as fast as possible */
+	} else if (ledinfo->delay == 0) {
+		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
+	}
+
+	/* else the delay is negative, which means switch on and stay on */
+
+	return XT_CONTINUE;
+}
+
+static void led_timeout_callback(struct timer_list *t)
+{
+	struct xt_led_info_internal *ledinternal = from_timer(ledinternal, t,
+							      timer);
+
+	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
+}
+
+static struct xt_led_info_internal *led_trigger_lookup(const char *name)
+{
+	struct xt_led_info_internal *ledinternal;
+
+	list_for_each_entry(ledinternal, &xt_led_triggers, list) {
+		if (!strcmp(name, ledinternal->netfilter_led_trigger.name)) {
+			return ledinternal;
+		}
+	}
+	return NULL;
+}
+
+static int led_tg_check(const struct xt_tgchk_param *par)
+{
+	struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal;
+	int err;
+
+	if (ledinfo->id[0] == '\0')
+		return -EINVAL;
+
+	mutex_lock(&xt_led_mutex);
+
+	ledinternal = led_trigger_lookup(ledinfo->id);
+	if (ledinternal) {
+		ledinternal->refcnt++;
+		goto out;
+	}
+
+	err = -ENOMEM;
+	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
+	if (!ledinternal)
+		goto exit_mutex_only;
+
+	ledinternal->trigger_id = kstrdup(ledinfo->id, GFP_KERNEL);
+	if (!ledinternal->trigger_id)
+		goto exit_internal_alloc;
+
+	ledinternal->refcnt = 1;
+	ledinternal->netfilter_led_trigger.name = ledinternal->trigger_id;
+
+	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
+	if (err) {
+		pr_info_ratelimited("Trigger name is already in use.\n");
+		goto exit_alloc;
+	}
+
+	/* Since the letinternal timer can be shared between multiple targets,
+	 * always set it up, even if the current target does not need it
+	 */
+	timer_setup(&ledinternal->timer, led_timeout_callback, 0);
+
+	list_add_tail(&ledinternal->list, &xt_led_triggers);
+
+out:
+	mutex_unlock(&xt_led_mutex);
+
+	ledinfo->internal_data = ledinternal;
+
+	return 0;
+
+exit_alloc:
+	kfree(ledinternal->trigger_id);
+
+exit_internal_alloc:
+	kfree(ledinternal);
+
+exit_mutex_only:
+	mutex_unlock(&xt_led_mutex);
+
+	return err;
+}
+
+static void led_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_led_info *ledinfo = par->targinfo;
+	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+
+	mutex_lock(&xt_led_mutex);
+
+	if (--ledinternal->refcnt) {
+		mutex_unlock(&xt_led_mutex);
+		return;
+	}
+
+	list_del(&ledinternal->list);
+
+	del_timer_sync(&ledinternal->timer);
+
+	led_trigger_unregister(&ledinternal->netfilter_led_trigger);
+
+	mutex_unlock(&xt_led_mutex);
+
+	kfree(ledinternal->trigger_id);
+	kfree(ledinternal);
+}
+
+static struct xt_target led_tg_reg __read_mostly = {
+	.name		= "LED",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.target		= led_tg,
+	.targetsize	= sizeof(struct xt_led_info),
+	.usersize	= offsetof(struct xt_led_info, internal_data),
+	.checkentry	= led_tg_check,
+	.destroy	= led_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init led_tg_init(void)
+{
+	return xt_register_target(&led_tg_reg);
+}
+
+static void __exit led_tg_exit(void)
+{
+	xt_unregister_target(&led_tg_reg);
+}
+
+module_init(led_tg_init);
+module_exit(led_tg_exit);
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
new file mode 100644
index 000000000..f39244f9c
--- /dev/null
+++ b/net/netfilter/xt_LOG.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is a module which is used for logging packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int
+log_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_log_info *loginfo = par->targinfo;
+	struct net *net = xt_net(par);
+	struct nf_loginfo li;
+
+	li.type = NF_LOG_TYPE_LOG;
+	li.u.log.level = loginfo->level;
+	li.u.log.logflags = loginfo->logflags;
+
+	nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+		      xt_out(par), &li, "%s", loginfo->prefix);
+	return XT_CONTINUE;
+}
+
+static int log_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_log_info *loginfo = par->targinfo;
+	int ret;
+
+	if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
+		return -EINVAL;
+
+	if (loginfo->level >= 8) {
+		pr_debug("level %u >= 8\n", loginfo->level);
+		return -EINVAL;
+	}
+
+	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+		pr_debug("prefix is not null-terminated\n");
+		return -EINVAL;
+	}
+
+	ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+	if (ret != 0 && !par->nft_compat) {
+		request_module("%s", "nf_log_syslog");
+
+		ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+	}
+
+	return ret;
+}
+
+static void log_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
+static struct xt_target log_tg_regs[] __read_mostly = {
+	{
+		.name		= "LOG",
+		.family		= NFPROTO_IPV4,
+		.target		= log_tg,
+		.targetsize	= sizeof(struct xt_log_info),
+		.checkentry	= log_tg_check,
+		.destroy	= log_tg_destroy,
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "LOG",
+		.family		= NFPROTO_IPV6,
+		.target		= log_tg,
+		.targetsize	= sizeof(struct xt_log_info),
+		.checkentry	= log_tg_check,
+		.destroy	= log_tg_destroy,
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init log_tg_init(void)
+{
+	return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
+}
+
+static void __exit log_tg_exit(void)
+{
+	xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
+}
+
+module_init(log_tg_init);
+module_exit(log_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
+MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging");
+MODULE_ALIAS("ipt_LOG");
+MODULE_ALIAS("ip6t_LOG");
+MODULE_SOFTDEP("pre: nf_log_syslog");
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
new file mode 100644
index 000000000..eae05c178
--- /dev/null
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Masquerade.  Simple mapping which alters range to a local IP address
+   (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_masquerade.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
+
+/* FIXME: Multiple targets. --RR */
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	if (mr->rangesize != 1) {
+		pr_debug("bad rangesize %u\n", mr->rangesize);
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static unsigned int
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct nf_nat_range2 range;
+	const struct nf_nat_ipv4_multi_range_compat *mr;
+
+	mr = par->targinfo;
+	range.flags = mr->range[0].flags;
+	range.min_proto = mr->range[0].min;
+	range.max_proto = mr->range[0].max;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
+				      xt_out(par));
+}
+
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS)
+		return -EINVAL;
+
+	return nf_ct_netns_get(par->net, par->family);
+}
+#endif
+
+static struct xt_target masquerade_tg_reg[] __read_mostly = {
+	{
+#if IS_ENABLED(CONFIG_IPV6)
+		.name		= "MASQUERADE",
+		.family		= NFPROTO_IPV6,
+		.target		= masquerade_tg6,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg6_checkentry,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	}, {
+#endif
+		.name		= "MASQUERADE",
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	}
+};
+
+static int __init masquerade_tg_init(void)
+{
+	int ret;
+
+	ret = xt_register_targets(masquerade_tg_reg,
+				  ARRAY_SIZE(masquerade_tg_reg));
+	if (ret)
+		return ret;
+
+	ret = nf_nat_masquerade_inet_register_notifiers();
+	if (ret) {
+		xt_unregister_targets(masquerade_tg_reg,
+				      ARRAY_SIZE(masquerade_tg_reg));
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit masquerade_tg_exit(void)
+{
+	xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg));
+	nf_nat_masquerade_inet_unregister_notifiers();
+}
+
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS("ip6t_MASQUERADE");
+#endif
+MODULE_ALIAS("ipt_MASQUERADE");
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
new file mode 100644
index 000000000..cb2ee80d8
--- /dev/null
+++ b/net/netfilter/xt_NETMAP.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+
+static unsigned int
+netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+	struct nf_nat_range2 newrange;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	union nf_inet_addr new_addr, netmask;
+	unsigned int i;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++)
+		netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
+				   range->max_addr.ip6[i]);
+
+	if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+	    xt_hooknum(par) == NF_INET_LOCAL_OUT)
+		new_addr.in6 = ipv6_hdr(skb)->daddr;
+	else
+		new_addr.in6 = ipv6_hdr(skb)->saddr;
+
+	for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) {
+		new_addr.ip6[i] &= ~netmask.ip6[i];
+		new_addr.ip6[i] |= range->min_addr.ip6[i] &
+				   netmask.ip6[i];
+	}
+
+	newrange.flags	= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr	= new_addr;
+	newrange.max_addr	= new_addr;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
+}
+
+static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
+		return -EINVAL;
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static unsigned int
+netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	__be32 new_ip, netmask;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range2 newrange;
+
+	WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+		xt_hooknum(par) != NF_INET_POST_ROUTING &&
+		xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+		xt_hooknum(par) != NF_INET_LOCAL_IN);
+	ct = nf_ct_get(skb, &ctinfo);
+
+	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+
+	if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+	    xt_hooknum(par) == NF_INET_LOCAL_OUT)
+		new_ip = ip_hdr(skb)->daddr & ~netmask;
+	else
+		new_ip = ip_hdr(skb)->saddr & ~netmask;
+	new_ip |= mr->range[0].min_ip & netmask;
+
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags	     = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = new_ip;
+	newrange.max_addr.ip = new_ip;
+	newrange.min_proto   = mr->range[0].min;
+	newrange.max_proto   = mr->range[0].max;
+
+	/* Hand modified range to generic setup. */
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
+}
+
+static int netmap_tg4_check(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	if (mr->rangesize != 1) {
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static struct xt_target netmap_tg_reg[] __read_mostly = {
+	{
+		.name       = "NETMAP",
+		.family     = NFPROTO_IPV6,
+		.revision   = 0,
+		.target     = netmap_tg6,
+		.targetsize = sizeof(struct nf_nat_range),
+		.table      = "nat",
+		.hooks      = (1 << NF_INET_PRE_ROUTING) |
+		              (1 << NF_INET_POST_ROUTING) |
+		              (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_LOCAL_IN),
+		.checkentry = netmap_tg6_checkentry,
+		.destroy    = netmap_tg_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "NETMAP",
+		.family     = NFPROTO_IPV4,
+		.revision   = 0,
+		.target     = netmap_tg4,
+		.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.table      = "nat",
+		.hooks      = (1 << NF_INET_PRE_ROUTING) |
+		              (1 << NF_INET_POST_ROUTING) |
+		              (1 << NF_INET_LOCAL_OUT) |
+		              (1 << NF_INET_LOCAL_IN),
+		.checkentry = netmap_tg4_check,
+		.destroy    = netmap_tg_destroy,
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init netmap_tg_init(void)
+{
+	return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg));
+}
+
+static void netmap_tg_exit(void)
+{
+	xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg));
+}
+
+module_init(netmap_tg_init);
+module_exit(netmap_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_NETMAP");
+MODULE_ALIAS("ipt_NETMAP");
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
new file mode 100644
index 000000000..e660c3710
--- /dev/null
+++ b/net/netfilter/xt_NFLOG.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFLOG.h>
+#include <net/netfilter/nf_log.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFLOG");
+MODULE_ALIAS("ip6t_NFLOG");
+
+static unsigned int
+nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_nflog_info *info = par->targinfo;
+	struct net *net = xt_net(par);
+	struct nf_loginfo li;
+
+	li.type		     = NF_LOG_TYPE_ULOG;
+	li.u.ulog.copy_len   = info->len;
+	li.u.ulog.group	     = info->group;
+	li.u.ulog.qthreshold = info->threshold;
+	li.u.ulog.flags	     = 0;
+
+	if (info->flags & XT_NFLOG_F_COPY_LEN)
+		li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
+
+	nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+		      xt_out(par), &li, "%s", info->prefix);
+
+	return XT_CONTINUE;
+}
+
+static int nflog_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_nflog_info *info = par->targinfo;
+	int ret;
+
+	if (info->flags & ~XT_NFLOG_MASK)
+		return -EINVAL;
+	if (info->prefix[sizeof(info->prefix) - 1] != '\0')
+		return -EINVAL;
+
+	ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+	if (ret != 0 && !par->nft_compat) {
+		request_module("%s", "nfnetlink_log");
+
+		ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+	}
+
+	return ret;
+}
+
+static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
+}
+
+static struct xt_target nflog_tg_reg __read_mostly = {
+	.name       = "NFLOG",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = nflog_tg_check,
+	.destroy    = nflog_tg_destroy,
+	.target     = nflog_tg,
+	.targetsize = sizeof(struct xt_nflog_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init nflog_tg_init(void)
+{
+	return xt_register_target(&nflog_tg_reg);
+}
+
+static void __exit nflog_tg_exit(void)
+{
+	xt_unregister_target(&nflog_tg_reg);
+}
+
+module_init(nflog_tg_init);
+module_exit(nflog_tg_exit);
+MODULE_SOFTDEP("pre: nfnetlink_log");
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
new file mode 100644
index 000000000..466da23e3
--- /dev/null
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* iptables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFQUEUE.h>
+
+#include <net/netfilter/nf_queue.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFQUEUE");
+MODULE_ALIAS("ip6t_NFQUEUE");
+MODULE_ALIAS("arpt_NFQUEUE");
+
+static u32 jhash_initval __read_mostly;
+
+static unsigned int
+nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info *tinfo = par->targinfo;
+
+	return NF_QUEUE_NR(tinfo->queuenum);
+}
+
+static unsigned int
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1) {
+		queue = nfqueue_hash(skb, queue, info->queues_total,
+				     xt_family(par), jhash_initval);
+	}
+	return NF_QUEUE_NR(queue);
+}
+
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v2 *info = par->targinfo;
+	unsigned int ret = nfqueue_tg_v1(skb, par);
+
+	if (info->bypass)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+	return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
+	u32 maxid;
+
+	init_hashrandom(&jhash_initval);
+
+	if (info->queues_total == 0) {
+		pr_info_ratelimited("number of total queues is 0\n");
+		return -EINVAL;
+	}
+	maxid = info->queues_total - 1 + info->queuenum;
+	if (maxid > 0xffff) {
+		pr_info_ratelimited("number of queues (%u) out of range (got %u)\n",
+				    info->queues_total, maxid);
+		return -ERANGE;
+	}
+	if (par->target->revision == 2 && info->flags > 1)
+		return -EINVAL;
+	if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
+	u32 queue = info->queuenum;
+	int ret;
+
+	if (info->queues_total > 1) {
+		if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+			int cpu = smp_processor_id();
+
+			queue = info->queuenum + cpu % info->queues_total;
+		} else {
+			queue = nfqueue_hash(skb, queue, info->queues_total,
+					     xt_family(par), jhash_initval);
+		}
+	}
+
+	ret = NF_QUEUE_NR(queue);
+	if (info->flags & NFQ_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	return ret;
+}
+
+static struct xt_target nfqueue_tg_reg[] __read_mostly = {
+	{
+		.name		= "NFQUEUE",
+		.family		= NFPROTO_UNSPEC,
+		.target		= nfqueue_tg,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v1,
+		.targetsize	= sizeof(struct xt_NFQ_info_v1),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v2,
+		.targetsize	= sizeof(struct xt_NFQ_info_v2),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 3,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v3,
+		.targetsize	= sizeof(struct xt_NFQ_info_v3),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init nfqueue_tg_init(void)
+{
+	return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
+}
+
+static void __exit nfqueue_tg_exit(void)
+{
+	xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
+}
+
+module_init(nfqueue_tg_init);
+module_exit(nfqueue_tg_exit);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
new file mode 100644
index 000000000..80f6624e2
--- /dev/null
+++ b/net/netfilter/xt_RATEEST.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+#include <linux/jhash.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <net/gen_stats.h>
+#include <net/netlink.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_RATEEST.h>
+#include <net/netfilter/xt_rateest.h>
+
+#define RATEEST_HSIZE	16
+
+struct xt_rateest_net {
+	struct mutex hash_lock;
+	struct hlist_head hash[RATEEST_HSIZE];
+};
+
+static unsigned int xt_rateest_id;
+
+static unsigned int jhash_rnd __read_mostly;
+
+static unsigned int xt_rateest_hash(const char *name)
+{
+	return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) &
+	       (RATEEST_HSIZE - 1);
+}
+
+static void xt_rateest_hash_insert(struct xt_rateest_net *xn,
+				   struct xt_rateest *est)
+{
+	unsigned int h;
+
+	h = xt_rateest_hash(est->name);
+	hlist_add_head(&est->list, &xn->hash[h]);
+}
+
+static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn,
+					      const char *name)
+{
+	struct xt_rateest *est;
+	unsigned int h;
+
+	h = xt_rateest_hash(name);
+	hlist_for_each_entry(est, &xn->hash[h], list) {
+		if (strcmp(est->name, name) == 0) {
+			est->refcnt++;
+			return est;
+		}
+	}
+
+	return NULL;
+}
+
+struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name)
+{
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+	struct xt_rateest *est;
+
+	mutex_lock(&xn->hash_lock);
+	est = __xt_rateest_lookup(xn, name);
+	mutex_unlock(&xn->hash_lock);
+	return est;
+}
+EXPORT_SYMBOL_GPL(xt_rateest_lookup);
+
+void xt_rateest_put(struct net *net, struct xt_rateest *est)
+{
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+
+	mutex_lock(&xn->hash_lock);
+	if (--est->refcnt == 0) {
+		hlist_del(&est->list);
+		gen_kill_estimator(&est->rate_est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		kfree_rcu(est, rcu);
+	}
+	mutex_unlock(&xn->hash_lock);
+}
+EXPORT_SYMBOL_GPL(xt_rateest_put);
+
+static unsigned int
+xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_rateest_target_info *info = par->targinfo;
+	struct gnet_stats_basic_sync *stats = &info->est->bstats;
+
+	spin_lock_bh(&info->est->lock);
+	u64_stats_add(&stats->bytes, skb->len);
+	u64_stats_inc(&stats->packets);
+	spin_unlock_bh(&info->est->lock);
+
+	return XT_CONTINUE;
+}
+
+static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id);
+	struct xt_rateest_target_info *info = par->targinfo;
+	struct xt_rateest *est;
+	struct {
+		struct nlattr		opt;
+		struct gnet_estimator	est;
+	} cfg;
+	int ret;
+
+	if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
+		return -ENAMETOOLONG;
+
+	net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
+
+	mutex_lock(&xn->hash_lock);
+	est = __xt_rateest_lookup(xn, info->name);
+	if (est) {
+		mutex_unlock(&xn->hash_lock);
+		/*
+		 * If estimator parameters are specified, they must match the
+		 * existing estimator.
+		 */
+		if ((!info->interval && !info->ewma_log) ||
+		    (info->interval != est->params.interval ||
+		     info->ewma_log != est->params.ewma_log)) {
+			xt_rateest_put(par->net, est);
+			return -EINVAL;
+		}
+		info->est = est;
+		return 0;
+	}
+
+	ret = -ENOMEM;
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
+	if (!est)
+		goto err1;
+
+	gnet_stats_basic_sync_init(&est->bstats);
+	strscpy(est->name, info->name, sizeof(est->name));
+	spin_lock_init(&est->lock);
+	est->refcnt		= 1;
+	est->params.interval	= info->interval;
+	est->params.ewma_log	= info->ewma_log;
+
+	cfg.opt.nla_len		= nla_attr_size(sizeof(cfg.est));
+	cfg.opt.nla_type	= TCA_STATS_RATE_EST;
+	cfg.est.interval	= info->interval;
+	cfg.est.ewma_log	= info->ewma_log;
+
+	ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
+				&est->lock, NULL, &cfg.opt);
+	if (ret < 0)
+		goto err2;
+
+	info->est = est;
+	xt_rateest_hash_insert(xn, est);
+	mutex_unlock(&xn->hash_lock);
+	return 0;
+
+err2:
+	kfree(est);
+err1:
+	mutex_unlock(&xn->hash_lock);
+	return ret;
+}
+
+static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_rateest_target_info *info = par->targinfo;
+
+	xt_rateest_put(par->net, info->est);
+}
+
+static struct xt_target xt_rateest_tg_reg __read_mostly = {
+	.name       = "RATEEST",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.target     = xt_rateest_tg,
+	.checkentry = xt_rateest_tg_checkentry,
+	.destroy    = xt_rateest_tg_destroy,
+	.targetsize = sizeof(struct xt_rateest_target_info),
+	.usersize   = offsetof(struct xt_rateest_target_info, est),
+	.me         = THIS_MODULE,
+};
+
+static __net_init int xt_rateest_net_init(struct net *net)
+{
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+	int i;
+
+	mutex_init(&xn->hash_lock);
+	for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
+		INIT_HLIST_HEAD(&xn->hash[i]);
+	return 0;
+}
+
+static struct pernet_operations xt_rateest_net_ops = {
+	.init = xt_rateest_net_init,
+	.id   = &xt_rateest_id,
+	.size = sizeof(struct xt_rateest_net),
+};
+
+static int __init xt_rateest_tg_init(void)
+{
+	int err = register_pernet_subsys(&xt_rateest_net_ops);
+
+	if (err)
+		return err;
+	return xt_register_target(&xt_rateest_tg_reg);
+}
+
+static void __exit xt_rateest_tg_fini(void)
+{
+	xt_unregister_target(&xt_rateest_tg_reg);
+	unregister_pernet_subsys(&xt_rateest_net_ops);
+}
+
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: packet rate estimator");
+MODULE_ALIAS("ipt_RATEEST");
+MODULE_ALIAS("ip6t_RATEEST");
+module_init(xt_rateest_tg_init);
+module_exit(xt_rateest_tg_fini);
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
new file mode 100644
index 000000000..ff66b56a3
--- /dev/null
+++ b/net/netfilter/xt_REDIRECT.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/types.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/addrconf.h>
+#include <net/checksum.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
+
+static unsigned int
+redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par));
+}
+
+static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS)
+		return -EINVAL;
+
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static int redirect_tg4_check(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	if (mr->rangesize != 1) {
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static unsigned int
+redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range2 range = {
+		.flags       = mr->range[0].flags,
+		.min_proto   = mr->range[0].min,
+		.max_proto   = mr->range[0].max,
+	};
+
+	return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par));
+}
+
+static struct xt_target redirect_tg_reg[] __read_mostly = {
+	{
+		.name       = "REDIRECT",
+		.family     = NFPROTO_IPV6,
+		.revision   = 0,
+		.table      = "nat",
+		.checkentry = redirect_tg6_checkentry,
+		.destroy    = redirect_tg_destroy,
+		.target     = redirect_tg6,
+		.targetsize = sizeof(struct nf_nat_range),
+		.hooks      = (1 << NF_INET_PRE_ROUTING) |
+		              (1 << NF_INET_LOCAL_OUT),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "REDIRECT",
+		.family     = NFPROTO_IPV4,
+		.revision   = 0,
+		.table      = "nat",
+		.target     = redirect_tg4,
+		.checkentry = redirect_tg4_check,
+		.destroy    = redirect_tg_destroy,
+		.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.hooks      = (1 << NF_INET_PRE_ROUTING) |
+		              (1 << NF_INET_LOCAL_OUT),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init redirect_tg_init(void)
+{
+	return xt_register_targets(redirect_tg_reg,
+				   ARRAY_SIZE(redirect_tg_reg));
+}
+
+static void __exit redirect_tg_exit(void)
+{
+	xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg));
+}
+
+module_init(redirect_tg_init);
+module_exit(redirect_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
+MODULE_ALIAS("ip6t_REDIRECT");
+MODULE_ALIAS("ipt_REDIRECT");
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
new file mode 100644
index 000000000..498a0bf6f
--- /dev/null
+++ b/net/netfilter/xt_SECMARK.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Module for modifying the secmark field of the skb, for use by
+ * security subsystems.
+ *
+ * Based on the nfmark match by:
+ * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/security.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SECMARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
+MODULE_DESCRIPTION("Xtables: packet security mark modification");
+MODULE_ALIAS("ipt_SECMARK");
+MODULE_ALIAS("ip6t_SECMARK");
+
+static u8 mode;
+
+static unsigned int
+secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info)
+{
+	u32 secmark = 0;
+
+	switch (mode) {
+	case SECMARK_MODE_SEL:
+		secmark = info->secid;
+		break;
+	default:
+		BUG();
+	}
+
+	skb->secmark = secmark;
+	return XT_CONTINUE;
+}
+
+static int checkentry_lsm(struct xt_secmark_target_info_v1 *info)
+{
+	int err;
+
+	info->secctx[SECMARK_SECCTX_MAX - 1] = '\0';
+	info->secid = 0;
+
+	err = security_secctx_to_secid(info->secctx, strlen(info->secctx),
+				       &info->secid);
+	if (err) {
+		if (err == -EINVAL)
+			pr_info_ratelimited("invalid security context \'%s\'\n",
+					    info->secctx);
+		return err;
+	}
+
+	if (!info->secid) {
+		pr_info_ratelimited("unable to map security context \'%s\'\n",
+				    info->secctx);
+		return -ENOENT;
+	}
+
+	err = security_secmark_relabel_packet(info->secid);
+	if (err) {
+		pr_info_ratelimited("unable to obtain relabeling permission\n");
+		return err;
+	}
+
+	security_secmark_refcount_inc();
+	return 0;
+}
+
+static int
+secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info)
+{
+	int err;
+
+	if (strcmp(table, "mangle") != 0 &&
+	    strcmp(table, "security") != 0) {
+		pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
+				    table);
+		return -EINVAL;
+	}
+
+	if (mode && mode != info->mode) {
+		pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n",
+				    mode, info->mode);
+		return -EINVAL;
+	}
+
+	switch (info->mode) {
+	case SECMARK_MODE_SEL:
+		break;
+	default:
+		pr_info_ratelimited("invalid mode: %hu\n", info->mode);
+		return -EINVAL;
+	}
+
+	err = checkentry_lsm(info);
+	if (err)
+		return err;
+
+	if (!mode)
+		mode = info->mode;
+	return 0;
+}
+
+static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	switch (mode) {
+	case SECMARK_MODE_SEL:
+		security_secmark_refcount_dec();
+	}
+}
+
+static int secmark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+	struct xt_secmark_target_info *info = par->targinfo;
+	struct xt_secmark_target_info_v1 newinfo = {
+		.mode	= info->mode,
+	};
+	int ret;
+
+	memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX);
+
+	ret = secmark_tg_check(par->table, &newinfo);
+	info->secid = newinfo.secid;
+
+	return ret;
+}
+
+static unsigned int
+secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_secmark_target_info *info = par->targinfo;
+	struct xt_secmark_target_info_v1 newinfo = {
+		.secid	= info->secid,
+	};
+
+	return secmark_tg(skb, &newinfo);
+}
+
+static int secmark_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	return secmark_tg_check(par->table, par->targinfo);
+}
+
+static unsigned int
+secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	return secmark_tg(skb, par->targinfo);
+}
+
+static struct xt_target secmark_tg_reg[] __read_mostly = {
+	{
+		.name		= "SECMARK",
+		.revision	= 0,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= secmark_tg_check_v0,
+		.destroy	= secmark_tg_destroy,
+		.target		= secmark_tg_v0,
+		.targetsize	= sizeof(struct xt_secmark_target_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SECMARK",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= secmark_tg_check_v1,
+		.destroy	= secmark_tg_destroy,
+		.target		= secmark_tg_v1,
+		.targetsize	= sizeof(struct xt_secmark_target_info_v1),
+		.usersize	= offsetof(struct xt_secmark_target_info_v1, secid),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init secmark_tg_init(void)
+{
+	return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+}
+
+static void __exit secmark_tg_exit(void)
+{
+	xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+}
+
+module_init(secmark_tg_init);
+module_exit(secmark_tg_exit);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
new file mode 100644
index 000000000..116a885ad
--- /dev/null
+++ b/net/netfilter/xt_TCPMSS.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is a module which is used for setting the MSS option in TCP packets.
+ *
+ * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/gfp.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/ipv6.h>
+#include <net/route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_TCPMSS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
+MODULE_ALIAS("ipt_TCPMSS");
+MODULE_ALIAS("ip6t_TCPMSS");
+
+static inline unsigned int
+optlen(const u_int8_t *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
+}
+
+static u_int32_t tcpmss_reverse_mtu(struct net *net,
+				    const struct sk_buff *skb,
+				    unsigned int family)
+{
+	struct flowi fl;
+	struct rtable *rt = NULL;
+	u_int32_t mtu     = ~0U;
+
+	if (family == PF_INET) {
+		struct flowi4 *fl4 = &fl.u.ip4;
+		memset(fl4, 0, sizeof(*fl4));
+		fl4->daddr = ip_hdr(skb)->saddr;
+	} else {
+		struct flowi6 *fl6 = &fl.u.ip6;
+
+		memset(fl6, 0, sizeof(*fl6));
+		fl6->daddr = ipv6_hdr(skb)->saddr;
+	}
+
+	nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
+	if (rt != NULL) {
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
+	}
+	return mtu;
+}
+
+static int
+tcpmss_mangle_packet(struct sk_buff *skb,
+		     const struct xt_action_param *par,
+		     unsigned int family,
+		     unsigned int tcphoff,
+		     unsigned int minlen)
+{
+	const struct xt_tcpmss_info *info = par->targinfo;
+	struct tcphdr *tcph;
+	int len, tcp_hdrlen;
+	unsigned int i;
+	__be16 oldval;
+	u16 newmss;
+	u8 *opt;
+
+	/* This is a fragment, no TCP header is available */
+	if (par->fragoff != 0)
+		return 0;
+
+	if (skb_ensure_writable(skb, skb->len))
+		return -1;
+
+	len = skb->len - tcphoff;
+	if (len < (int)sizeof(struct tcphdr))
+		return -1;
+
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	tcp_hdrlen = tcph->doff * 4;
+
+	if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
+		return -1;
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
+		struct net *net = xt_net(par);
+		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+		unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
+
+		if (min_mtu <= minlen) {
+			net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
+					    min_mtu);
+			return -1;
+		}
+		newmss = min_mtu - minlen;
+	} else
+		newmss = info->mss;
+
+	opt = (u_int8_t *)tcph;
+	for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
+			u_int16_t oldmss;
+
+			oldmss = (opt[i+2] << 8) | opt[i+3];
+
+			/* Never increase MSS, even when setting it, as
+			 * doing so results in problems for hosts that rely
+			 * on MSS being set correctly.
+			 */
+			if (oldmss <= newmss)
+				return 0;
+
+			opt[i+2] = (newmss & 0xff00) >> 8;
+			opt[i+3] = newmss & 0x00ff;
+
+			inet_proto_csum_replace2(&tcph->check, skb,
+						 htons(oldmss), htons(newmss),
+						 false);
+			return 0;
+		}
+	}
+
+	/* There is data after the header so the option can't be added
+	 * without moving it, and doing so may make the SYN packet
+	 * itself too large. Accept the packet unmodified instead.
+	 */
+	if (len > tcp_hdrlen)
+		return 0;
+
+	/* tcph->doff has 4 bits, do not wrap it to 0 */
+	if (tcp_hdrlen >= 15 * 4)
+		return 0;
+
+	/*
+	 * MSS Option not found ?! add it..
+	 */
+	if (skb_tailroom(skb) < TCPOLEN_MSS) {
+		if (pskb_expand_head(skb, 0,
+				     TCPOLEN_MSS - skb_tailroom(skb),
+				     GFP_ATOMIC))
+			return -1;
+		tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	}
+
+	skb_put(skb, TCPOLEN_MSS);
+
+	/*
+	 * IPv4: RFC 1122 states "If an MSS option is not received at
+	 * connection setup, TCP MUST assume a default send MSS of 536".
+	 * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
+	 * length IPv6 header of 60, ergo the default MSS value is 1220
+	 * Since no MSS was provided, we must use the default values
+	 */
+	if (xt_family(par) == NFPROTO_IPV4)
+		newmss = min(newmss, (u16)536);
+	else
+		newmss = min(newmss, (u16)1220);
+
+	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
+	memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
+
+	inet_proto_csum_replace2(&tcph->check, skb,
+				 htons(len), htons(len + TCPOLEN_MSS), true);
+	opt[0] = TCPOPT_MSS;
+	opt[1] = TCPOLEN_MSS;
+	opt[2] = (newmss & 0xff00) >> 8;
+	opt[3] = newmss & 0x00ff;
+
+	inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false);
+
+	oldval = ((__be16 *)tcph)[6];
+	tcph->doff += TCPOLEN_MSS/4;
+	inet_proto_csum_replace2(&tcph->check, skb,
+				 oldval, ((__be16 *)tcph)[6], false);
+	return TCPOLEN_MSS;
+}
+
+static unsigned int
+tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	__be16 newlen;
+	int ret;
+
+	ret = tcpmss_mangle_packet(skb, par,
+				   PF_INET,
+				   iph->ihl * 4,
+				   sizeof(*iph) + sizeof(struct tcphdr));
+	if (ret < 0)
+		return NF_DROP;
+	if (ret > 0) {
+		iph = ip_hdr(skb);
+		newlen = htons(ntohs(iph->tot_len) + ret);
+		csum_replace2(&iph->check, iph->tot_len, newlen);
+		iph->tot_len = newlen;
+	}
+	return XT_CONTINUE;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static unsigned int
+tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	u8 nexthdr;
+	__be16 frag_off, oldlen, newlen;
+	int tcphoff;
+	int ret;
+
+	nexthdr = ipv6h->nexthdr;
+	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
+	if (tcphoff < 0)
+		return NF_DROP;
+	ret = tcpmss_mangle_packet(skb, par,
+				   PF_INET6,
+				   tcphoff,
+				   sizeof(*ipv6h) + sizeof(struct tcphdr));
+	if (ret < 0)
+		return NF_DROP;
+	if (ret > 0) {
+		ipv6h = ipv6_hdr(skb);
+		oldlen = ipv6h->payload_len;
+		newlen = htons(ntohs(oldlen) + ret);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)oldlen),
+					     (__force __wsum)newlen);
+		ipv6h->payload_len = newlen;
+	}
+	return XT_CONTINUE;
+}
+#endif
+
+/* Must specify -p tcp --syn */
+static inline bool find_syn_match(const struct xt_entry_match *m)
+{
+	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
+
+	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+	    tcpinfo->flg_cmp & TCPHDR_SYN &&
+	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
+		return true;
+
+	return false;
+}
+
+static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
+			   (1 << NF_INET_LOCAL_OUT) |
+			   (1 << NF_INET_POST_ROUTING))) != 0) {
+		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		return -EINVAL;
+	}
+	if (par->nft_compat)
+		return 0;
+
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return 0;
+	pr_info_ratelimited("Only works on TCP SYN packets\n");
+	return -EINVAL;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ip6t_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
+
+	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
+			   (1 << NF_INET_LOCAL_OUT) |
+			   (1 << NF_INET_POST_ROUTING))) != 0) {
+		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		return -EINVAL;
+	}
+	if (par->nft_compat)
+		return 0;
+
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return 0;
+	pr_info_ratelimited("Only works on TCP SYN packets\n");
+	return -EINVAL;
+}
+#endif
+
+static struct xt_target tcpmss_tg_reg[] __read_mostly = {
+	{
+		.family		= NFPROTO_IPV4,
+		.name		= "TCPMSS",
+		.checkentry	= tcpmss_tg4_check,
+		.target		= tcpmss_tg4,
+		.targetsize	= sizeof(struct xt_tcpmss_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.family		= NFPROTO_IPV6,
+		.name		= "TCPMSS",
+		.checkentry	= tcpmss_tg6_check,
+		.target		= tcpmss_tg6,
+		.targetsize	= sizeof(struct xt_tcpmss_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tcpmss_tg_init(void)
+{
+	return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
+}
+
+static void __exit tcpmss_tg_exit(void)
+{
+	xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
+}
+
+module_init(tcpmss_tg_init);
+module_exit(tcpmss_tg_exit);
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
new file mode 100644
index 000000000..30e994641
--- /dev/null
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * A module for stripping a specific TCP option from TCP packets.
+ *
+ * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
+ * Copyright © CC Computer Consultants GmbH, 2007
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TCPOPTSTRIP.h>
+
+static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
+}
+
+static unsigned int
+tcpoptstrip_mangle_packet(struct sk_buff *skb,
+			  const struct xt_action_param *par,
+			  unsigned int tcphoff)
+{
+	const struct xt_tcpoptstrip_target_info *info = par->targinfo;
+	struct tcphdr *tcph, _th;
+	unsigned int optl, i, j;
+	u_int16_t n, o;
+	u_int8_t *opt;
+	int tcp_hdrlen;
+
+	/* This is a fragment, no TCP header is available */
+	if (par->fragoff != 0)
+		return XT_CONTINUE;
+
+	tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
+	if (!tcph)
+		return NF_DROP;
+
+	tcp_hdrlen = tcph->doff * 4;
+	if (tcp_hdrlen < sizeof(struct tcphdr))
+		return NF_DROP;
+
+	if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
+		return NF_DROP;
+
+	/* must reload tcph, might have been moved */
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	opt  = (u8 *)tcph;
+
+	/*
+	 * Walk through all TCP options - if we find some option to remove,
+	 * set all octets to %TCPOPT_NOP and adjust checksum.
+	 */
+	for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) {
+		optl = optlen(opt, i);
+
+		if (i + optl > tcp_hdrlen)
+			break;
+
+		if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
+			continue;
+
+		for (j = 0; j < optl; ++j) {
+			o = opt[i+j];
+			n = TCPOPT_NOP;
+			if ((i + j) % 2 == 0) {
+				o <<= 8;
+				n <<= 8;
+			}
+			inet_proto_csum_replace2(&tcph->check, skb, htons(o),
+						 htons(n), false);
+		}
+		memset(opt + i, TCPOPT_NOP, optl);
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+static unsigned int
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int tcphoff;
+	u_int8_t nexthdr;
+	__be16 frag_off;
+
+	nexthdr = ipv6h->nexthdr;
+	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
+	if (tcphoff < 0)
+		return NF_DROP;
+
+	return tcpoptstrip_mangle_packet(skb, par, tcphoff);
+}
+#endif
+
+static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
+	{
+		.name       = "TCPOPTSTRIP",
+		.family     = NFPROTO_IPV4,
+		.table      = "mangle",
+		.proto      = IPPROTO_TCP,
+		.target     = tcpoptstrip_tg4,
+		.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+		.me         = THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+	{
+		.name       = "TCPOPTSTRIP",
+		.family     = NFPROTO_IPV6,
+		.table      = "mangle",
+		.proto      = IPPROTO_TCP,
+		.target     = tcpoptstrip_tg6,
+		.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tcpoptstrip_tg_init(void)
+{
+	return xt_register_targets(tcpoptstrip_tg_reg,
+				   ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+static void __exit tcpoptstrip_tg_exit(void)
+{
+	xt_unregister_targets(tcpoptstrip_tg_reg,
+			      ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+module_init(tcpoptstrip_tg_init);
+module_exit(tcpoptstrip_tg_exit);
+MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: TCP option stripping");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TCPOPTSTRIP");
+MODULE_ALIAS("ip6t_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 000000000..a5ebd5640
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright © Sebastian Claßen, 2007
+ *	Jan Engelhardt, 2007-2010
+ *
+ *	based on ipt_ROUTE.c from Cédric de Launois
+ *	<delaunois@info.ucl.be>
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+#include <linux/netfilter/xt_TEE.h>
+
+struct xt_tee_priv {
+	struct list_head	list;
+	struct xt_tee_tginfo	*tginfo;
+	int			oif;
+};
+
+static unsigned int tee_net_id __read_mostly;
+static const union nf_inet_addr tee_zero_address;
+
+struct tee_net {
+	struct list_head priv_list;
+	/* lock protects the priv_list */
+	struct mutex lock;
+};
+
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	int oif = info->priv ? info->priv->oif : 0;
+
+	nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif);
+
+	return XT_CONTINUE;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	int oif = info->priv ? info->priv->oif : 0;
+
+	nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif);
+
+	return XT_CONTINUE;
+}
+#endif
+
+static int tee_netdev_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct tee_net *tn = net_generic(net, tee_net_id);
+	struct xt_tee_priv *priv;
+
+	mutex_lock(&tn->lock);
+	list_for_each_entry(priv, &tn->priv_list, list) {
+		switch (event) {
+		case NETDEV_REGISTER:
+			if (!strcmp(dev->name, priv->tginfo->oif))
+				priv->oif = dev->ifindex;
+			break;
+		case NETDEV_UNREGISTER:
+			if (dev->ifindex == priv->oif)
+				priv->oif = -1;
+			break;
+		case NETDEV_CHANGENAME:
+			if (!strcmp(dev->name, priv->tginfo->oif))
+				priv->oif = dev->ifindex;
+			else if (dev->ifindex == priv->oif)
+				priv->oif = -1;
+			break;
+		}
+	}
+	mutex_unlock(&tn->lock);
+
+	return NOTIFY_DONE;
+}
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	struct tee_net *tn = net_generic(par->net, tee_net_id);
+	struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_priv *priv;
+
+	/* 0.0.0.0 and :: not allowed */
+	if (memcmp(&info->gw, &tee_zero_address,
+		   sizeof(tee_zero_address)) == 0)
+		return -EINVAL;
+
+	if (info->oif[0]) {
+		struct net_device *dev;
+
+		if (info->oif[sizeof(info->oif)-1] != '\0')
+			return -EINVAL;
+
+		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+		if (priv == NULL)
+			return -ENOMEM;
+
+		priv->tginfo  = info;
+		priv->oif     = -1;
+		info->priv    = priv;
+
+		dev = dev_get_by_name(par->net, info->oif);
+		if (dev) {
+			priv->oif = dev->ifindex;
+			dev_put(dev);
+		}
+		mutex_lock(&tn->lock);
+		list_add(&priv->list, &tn->priv_list);
+		mutex_unlock(&tn->lock);
+	} else
+		info->priv = NULL;
+
+	static_key_slow_inc(&xt_tee_enabled);
+	return 0;
+}
+
+static void tee_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct tee_net *tn = net_generic(par->net, tee_net_id);
+	struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->priv) {
+		mutex_lock(&tn->lock);
+		list_del(&info->priv->list);
+		mutex_unlock(&tn->lock);
+		kfree(info->priv);
+	}
+	static_key_slow_dec(&xt_tee_enabled);
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.usersize   = offsetof(struct xt_tee_tginfo, priv),
+		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
+		.me         = THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.usersize   = offsetof(struct xt_tee_tginfo, priv),
+		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __net_init tee_net_init(struct net *net)
+{
+	struct tee_net *tn = net_generic(net, tee_net_id);
+
+	INIT_LIST_HEAD(&tn->priv_list);
+	mutex_init(&tn->lock);
+	return 0;
+}
+
+static struct pernet_operations tee_net_ops = {
+	.init = tee_net_init,
+	.id   = &tee_net_id,
+	.size = sizeof(struct tee_net),
+};
+
+static struct notifier_block tee_netdev_notifier = {
+	.notifier_call = tee_netdev_event,
+};
+
+static int __init tee_tg_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&tee_net_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+	if (ret < 0)
+		goto cleanup_subsys;
+
+	ret = register_netdevice_notifier(&tee_netdev_notifier);
+	if (ret < 0)
+		goto unregister_targets;
+
+	return 0;
+
+unregister_targets:
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+cleanup_subsys:
+	unregister_pernet_subsys(&tee_net_ops);
+	return ret;
+}
+
+static void __exit tee_tg_exit(void)
+{
+	unregister_netdevice_notifier(&tee_netdev_notifier);
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+	unregister_pernet_subsys(&tee_net_ops);
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
new file mode 100644
index 000000000..e4bea1d34
--- /dev/null
+++ b/net/netfilter/xt_TPROXY.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2010 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#define XT_TPROXY_HAVE_IPV6 1
+#include <net/if_inet6.h>
+#include <net/addrconf.h>
+#include <net/inet6_hashtables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+
+#include <net/netfilter/nf_tproxy.h>
+#include <linux/netfilter/xt_TPROXY.h>
+
+static unsigned int
+tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
+	   u_int32_t mark_mask, u_int32_t mark_value)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct udphdr _hdr, *hp;
+	struct sock *sk;
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return NF_DROP;
+
+	/* check if there's an ongoing connection on the packet
+	 * addresses, this happens if the redirect already happened
+	 * and the current packet belongs to an already established
+	 * connection */
+	sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
+				   iph->saddr, iph->daddr,
+				   hp->source, hp->dest,
+				   skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr);
+	if (!lport)
+		lport = hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT)
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
+	else if (!sk)
+		/* no, there's no established connection, check if
+		 * there's a listener on the redirected addr/port */
+		sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
+					   iph->saddr, laddr,
+					   hp->source, lport,
+					   skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_sk_is_transparent(sk)) {
+		/* This should be in a separate target, but we don't do multiple
+		   targets on the same rule yet */
+		skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
+		nf_tproxy_assign_sock(skb, sk);
+		return NF_ACCEPT;
+	}
+
+	return NF_DROP;
+}
+
+static unsigned int
+tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tproxy_target_info *tgi = par->targinfo;
+
+	return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
+			  tgi->mark_mask, tgi->mark_value);
+}
+
+static unsigned int
+tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+
+	return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
+			  tgi->mark_mask, tgi->mark_value);
+}
+
+#ifdef XT_TPROXY_HAVE_IPV6
+
+static unsigned int
+tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+	struct udphdr _hdr, *hp;
+	struct sock *sk;
+	const struct in6_addr *laddr;
+	__be16 lport;
+	int thoff = 0;
+	int tproto;
+
+	tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+	if (tproto < 0)
+		return NF_DROP;
+
+	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+	if (!hp)
+		return NF_DROP;
+
+	/* check if there's an ongoing connection on the packet
+	 * addresses, this happens if the redirect already happened
+	 * and the current packet belongs to an already established
+	 * connection */
+	sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto,
+				   &iph->saddr, &iph->daddr,
+				   hp->source, hp->dest,
+				   xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
+	lport = tgi->lport ? tgi->lport : hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT) {
+		const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff,
+					      xt_net(par),
+					      &tgi->laddr.in6,
+					      tgi->lport,
+					      sk);
+	}
+	else if (!sk)
+		/* no there's no established connection, check if
+		 * there's a listener on the redirected addr/port */
+		sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff,
+					   tproto, &iph->saddr, laddr,
+					   hp->source, lport,
+					   xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_sk_is_transparent(sk)) {
+		/* This should be in a separate target, but we don't do multiple
+		   targets on the same rule yet */
+		skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
+		nf_tproxy_assign_sock(skb, sk);
+		return NF_ACCEPT;
+	}
+
+	return NF_DROP;
+}
+
+static int tproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+	const struct ip6t_ip6 *i = par->entryinfo;
+	int err;
+
+	err = nf_defrag_ipv6_enable(par->net);
+	if (err)
+		return err;
+
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
+	    !(i->invflags & IP6T_INV_PROTO))
+		return 0;
+
+	pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
+	return -EINVAL;
+}
+
+static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_defrag_ipv6_disable(par->net);
+}
+#endif
+
+static int tproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+	const struct ipt_ip *i = par->entryinfo;
+	int err;
+
+	err = nf_defrag_ipv4_enable(par->net);
+	if (err)
+		return err;
+
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+	    && !(i->invflags & IPT_INV_PROTO))
+		return 0;
+
+	pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
+	return -EINVAL;
+}
+
+static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_defrag_ipv4_disable(par->net);
+}
+
+static struct xt_target tproxy_tg_reg[] __read_mostly = {
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV4,
+		.table		= "mangle",
+		.target		= tproxy_tg4_v0,
+		.revision	= 0,
+		.targetsize	= sizeof(struct xt_tproxy_target_info),
+		.checkentry	= tproxy_tg4_check,
+		.destroy	= tproxy_tg4_destroy,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV4,
+		.table		= "mangle",
+		.target		= tproxy_tg4_v1,
+		.revision	= 1,
+		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
+		.checkentry	= tproxy_tg4_check,
+		.destroy	= tproxy_tg4_destroy,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+#ifdef XT_TPROXY_HAVE_IPV6
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV6,
+		.table		= "mangle",
+		.target		= tproxy_tg6_v1,
+		.revision	= 1,
+		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
+		.checkentry	= tproxy_tg6_check,
+		.destroy	= tproxy_tg6_destroy,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+#endif
+
+};
+
+static int __init tproxy_tg_init(void)
+{
+	return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
+}
+
+static void __exit tproxy_tg_exit(void)
+{
+	xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
+}
+
+module_init(tproxy_tg_init);
+module_exit(tproxy_tg_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
+MODULE_ALIAS("ipt_TPROXY");
+MODULE_ALIAS("ip6t_TPROXY");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
new file mode 100644
index 000000000..5582dce98
--- /dev/null
+++ b/net/netfilter/xt_TRACE.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* This is a module which is used to mark packets for tracing.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+
+MODULE_DESCRIPTION("Xtables: packet flow tracing");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TRACE");
+MODULE_ALIAS("ip6t_TRACE");
+
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+	return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
+static unsigned int
+trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	skb->nf_trace = 1;
+	return XT_CONTINUE;
+}
+
+static struct xt_target trace_tg_reg __read_mostly = {
+	.name		= "TRACE",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.table		= "raw",
+	.target		= trace_tg,
+	.checkentry	= trace_tg_check,
+	.destroy	= trace_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init trace_tg_init(void)
+{
+	return xt_register_target(&trace_tg_reg);
+}
+
+static void __exit trace_tg_exit(void)
+{
+	xt_unregister_target(&trace_tg_reg);
+}
+
+module_init(trace_tg_init);
+module_exit(trace_tg_exit);
+MODULE_SOFTDEP("pre: nf_log_syslog");
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
new file mode 100644
index 000000000..e9b2181e8
--- /dev/null
+++ b/net/netfilter/xt_addrtype.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  iptables module to match inet_addr_type() of an ip.
+ *
+ *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ *  (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/ip6_fib.h>
+#endif
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/xt_addrtype.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: address type match");
+MODULE_ALIAS("ipt_addrtype");
+MODULE_ALIAS("ip6t_addrtype");
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
+			    const struct in6_addr *addr, u16 mask)
+{
+	struct flowi6 flow;
+	struct rt6_info *rt;
+	u32 ret = 0;
+	int route_err;
+
+	memset(&flow, 0, sizeof(flow));
+	flow.daddr = *addr;
+	if (dev)
+		flow.flowi6_oif = dev->ifindex;
+
+	if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+		if (nf_ipv6_chk_addr(net, addr, dev, true))
+			ret = XT_ADDRTYPE_LOCAL;
+	}
+
+	route_err = nf_ip6_route(net, (struct dst_entry **)&rt,
+				 flowi6_to_flowi(&flow), false);
+	if (route_err)
+		return XT_ADDRTYPE_UNREACHABLE;
+
+	if (rt->rt6i_flags & RTF_REJECT)
+		ret = XT_ADDRTYPE_UNREACHABLE;
+
+	if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
+		ret |= XT_ADDRTYPE_LOCAL;
+	if (ipv6_anycast_destination((struct dst_entry *)rt, addr))
+		ret |= XT_ADDRTYPE_ANYCAST;
+
+	dst_release(&rt->dst);
+	return ret;
+}
+
+static bool match_type6(struct net *net, const struct net_device *dev,
+				const struct in6_addr *addr, u16 mask)
+{
+	int addr_type = ipv6_addr_type(addr);
+
+	if ((mask & XT_ADDRTYPE_MULTICAST) &&
+	    !(addr_type & IPV6_ADDR_MULTICAST))
+		return false;
+	if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
+		return false;
+	if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
+		return false;
+
+	if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
+	     XT_ADDRTYPE_UNREACHABLE) & mask)
+		return !!(mask & match_lookup_rt6(net, dev, addr, mask));
+	return true;
+}
+
+static bool
+addrtype_mt6(struct net *net, const struct net_device *dev,
+	const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	bool ret = true;
+
+	if (info->source)
+		ret &= match_type6(net, dev, &iph->saddr, info->source) ^
+		       (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
+	if (ret && info->dest)
+		ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
+		       !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
+	return ret;
+}
+#endif
+
+static inline bool match_type(struct net *net, const struct net_device *dev,
+			      __be32 addr, u_int16_t mask)
+{
+	return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
+}
+
+static bool
+addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = xt_net(par);
+	const struct xt_addrtype_info *info = par->matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+	bool ret = true;
+
+	if (info->source)
+		ret &= match_type(net, NULL, iph->saddr, info->source) ^
+		       info->invert_source;
+	if (info->dest)
+		ret &= match_type(net, NULL, iph->daddr, info->dest) ^
+		       info->invert_dest;
+
+	return ret;
+}
+
+static bool
+addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = xt_net(par);
+	const struct xt_addrtype_info_v1 *info = par->matchinfo;
+	const struct iphdr *iph;
+	const struct net_device *dev = NULL;
+	bool ret = true;
+
+	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
+		dev = xt_in(par);
+	else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+		dev = xt_out(par);
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	if (xt_family(par) == NFPROTO_IPV6)
+		return addrtype_mt6(net, dev, skb, info);
+#endif
+	iph = ip_hdr(skb);
+	if (info->source)
+		ret &= match_type(net, dev, iph->saddr, info->source) ^
+		       (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
+	if (ret && info->dest)
+		ret &= match_type(net, dev, iph->daddr, info->dest) ^
+		       !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
+	return ret;
+}
+
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+{
+	const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
+	struct xt_addrtype_info_v1 *info = par->matchinfo;
+
+	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+		goto err;
+
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
+		errmsg = "output interface limitation not valid in PREROUTING and INPUT";
+		goto err;
+	}
+
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
+		errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT";
+		goto err;
+	}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	if (par->family == NFPROTO_IPV6) {
+		if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
+			errmsg = "ipv6 BLACKHOLE matching not supported";
+			goto err;
+		}
+		if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
+			errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported";
+			goto err;
+		}
+		if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
+			errmsg = "ipv6 does not support BROADCAST matching";
+			goto err;
+		}
+	}
+#endif
+	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
+}
+
+static struct xt_match addrtype_mt_reg[] __read_mostly = {
+	{
+		.name		= "addrtype",
+		.family		= NFPROTO_IPV4,
+		.match		= addrtype_mt_v0,
+		.matchsize	= sizeof(struct xt_addrtype_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "addrtype",
+		.family		= NFPROTO_UNSPEC,
+		.revision	= 1,
+		.match		= addrtype_mt_v1,
+		.checkentry	= addrtype_mt_checkentry_v1,
+		.matchsize	= sizeof(struct xt_addrtype_info_v1),
+		.me		= THIS_MODULE
+	}
+};
+
+static int __init addrtype_mt_init(void)
+{
+	return xt_register_matches(addrtype_mt_reg,
+				   ARRAY_SIZE(addrtype_mt_reg));
+}
+
+static void __exit addrtype_mt_exit(void)
+{
+	xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
+}
+
+module_init(addrtype_mt_init);
+module_exit(addrtype_mt_exit);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
new file mode 100644
index 000000000..849ac552a
--- /dev/null
+++ b/net/netfilter/xt_bpf.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Xtables module to match packets using a BPF filter.
+ * Copyright 2013 Google Inc.
+ * Written by Willem de Bruijn <willemb@google.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+#include <linux/netfilter/xt_bpf.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
+MODULE_DESCRIPTION("Xtables: BPF filter match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_bpf");
+MODULE_ALIAS("ip6t_bpf");
+
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+				   struct bpf_prog **ret)
+{
+	struct sock_fprog_kern program;
+
+	if (len > XT_BPF_MAX_NUM_INSTR)
+		return -EINVAL;
+
+	program.len = len;
+	program.filter = insns;
+
+	if (bpf_prog_create(ret, &program)) {
+		pr_info_ratelimited("check failed: parse error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+	struct bpf_prog *prog;
+
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	*ret = prog;
+	return 0;
+}
+
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+	if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+		return -EINVAL;
+
+	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+	return PTR_ERR_OR_ZERO(*ret);
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+
+	return __bpf_mt_check_bytecode(info->bpf_program,
+				       info->bpf_program_num_elem,
+				       &info->filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info_v1 *info = par->matchinfo;
+
+	if (info->mode == XT_BPF_MODE_BYTECODE)
+		return __bpf_mt_check_bytecode(info->bpf_program,
+					       info->bpf_program_num_elem,
+					       &info->filter);
+	else if (info->mode == XT_BPF_MODE_FD_ELF)
+		return __bpf_mt_check_fd(info->fd, &info->filter);
+	else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+		return __bpf_mt_check_path(info->path, &info->filter);
+	else
+		return -EINVAL;
+}
+
+static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	return bpf_prog_run(info->filter, skb);
+}
+
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+	return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
+static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+	bpf_prog_destroy(info->filter);
+}
+
+static struct xt_match bpf_mt_reg[] __read_mostly = {
+	{
+		.name		= "bpf",
+		.revision	= 0,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= bpf_mt_check,
+		.match		= bpf_mt,
+		.destroy	= bpf_mt_destroy,
+		.matchsize	= sizeof(struct xt_bpf_info),
+		.usersize	= offsetof(struct xt_bpf_info, filter),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "bpf",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= bpf_mt_check_v1,
+		.match		= bpf_mt_v1,
+		.destroy	= bpf_mt_destroy_v1,
+		.matchsize	= sizeof(struct xt_bpf_info_v1),
+		.usersize	= offsetof(struct xt_bpf_info_v1, filter),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init bpf_mt_init(void)
+{
+	return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
+}
+
+static void __exit bpf_mt_exit(void)
+{
+	xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
+}
+
+module_init(bpf_mt_init);
+module_exit(bpf_mt_exit);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
new file mode 100644
index 000000000..c0f5e9a4f
--- /dev/null
+++ b/net/netfilter/xt_cgroup.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Xtables module to match the process control group.
+ *
+ * Might be used to implement individual "per-application" firewall
+ * policies in contrast to global policies based on control groups.
+ * Matching is based upon processes tagged to net_cls' classid marker.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_cgroup.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("Xtables: process control group matching");
+MODULE_ALIAS("ipt_cgroup");
+MODULE_ALIAS("ip6t_cgroup");
+
+static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v0 *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v1 *info = par->matchinfo;
+	struct cgroup *cgrp;
+
+	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+		return -EINVAL;
+
+	if (!info->has_path && !info->has_classid) {
+		pr_info("xt_cgroup: no path or classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path && info->has_classid) {
+		pr_info_ratelimited("path and classid specified\n");
+		return -EINVAL;
+	}
+
+	info->priv = NULL;
+	if (info->has_path) {
+		cgrp = cgroup_get_from_path(info->path);
+		if (IS_ERR(cgrp)) {
+			pr_info_ratelimited("invalid path, errno=%ld\n",
+					    PTR_ERR(cgrp));
+			return -EINVAL;
+		}
+		info->priv = cgrp;
+	}
+
+	return 0;
+}
+
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct cgroup *cgrp;
+
+	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+		return -EINVAL;
+
+	if (!info->has_path && !info->has_classid) {
+		pr_info("xt_cgroup: no path or classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path && info->has_classid) {
+		pr_info_ratelimited("path and classid specified\n");
+		return -EINVAL;
+	}
+
+	info->priv = NULL;
+	if (info->has_path) {
+		cgrp = cgroup_get_from_path(info->path);
+		if (IS_ERR(cgrp)) {
+			pr_info_ratelimited("invalid path, errno=%ld\n",
+					    PTR_ERR(cgrp));
+			return -EINVAL;
+		}
+		info->priv = cgrp;
+	}
+
+	return 0;
+}
+
+static bool
+cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v0 *info = par->matchinfo;
+	struct sock *sk = skb->sk;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+		return false;
+
+	return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
+		info->invert;
+}
+
+static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v1 *info = par->matchinfo;
+	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+		return false;
+
+	if (ancestor)
+		return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+			info->invert_path;
+	else
+		return (info->classid == sock_cgroup_classid(skcd)) ^
+			info->invert_classid;
+}
+
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+		return false;
+
+	if (ancestor)
+		return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+			info->invert_path;
+	else
+		return (info->classid == sock_cgroup_classid(skcd)) ^
+			info->invert_classid;
+}
+
+static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	struct xt_cgroup_info_v1 *info = par->matchinfo;
+
+	if (info->priv)
+		cgroup_put(info->priv);
+}
+
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+	if (info->priv)
+		cgroup_put(info->priv);
+}
+
+static struct xt_match cgroup_mt_reg[] __read_mostly = {
+	{
+		.name		= "cgroup",
+		.revision	= 0,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v0,
+		.match		= cgroup_mt_v0,
+		.matchsize	= sizeof(struct xt_cgroup_info_v0),
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
+	{
+		.name		= "cgroup",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v1,
+		.match		= cgroup_mt_v1,
+		.matchsize	= sizeof(struct xt_cgroup_info_v1),
+		.usersize	= offsetof(struct xt_cgroup_info_v1, priv),
+		.destroy	= cgroup_mt_destroy_v1,
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
+	{
+		.name		= "cgroup",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v2,
+		.match		= cgroup_mt_v2,
+		.matchsize	= sizeof(struct xt_cgroup_info_v2),
+		.usersize	= offsetof(struct xt_cgroup_info_v2, priv),
+		.destroy	= cgroup_mt_destroy_v2,
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
+};
+
+static int __init cgroup_mt_init(void)
+{
+	return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
+}
+
+static void __exit cgroup_mt_exit(void)
+{
+	xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
+}
+
+module_init(cgroup_mt_init);
+module_exit(cgroup_mt_exit);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
new file mode 100644
index 000000000..a047a5453
--- /dev/null
+++ b/net/netfilter/xt_cluster.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/xt_cluster.h>
+
+static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct)
+{
+	return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+}
+
+static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
+{
+	return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info)
+{
+	return jhash_1word(ip, info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info)
+{
+	return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash(const struct nf_conn *ct,
+		const struct xt_cluster_match_info *info)
+{
+	u_int32_t hash = 0;
+
+	switch(nf_ct_l3num(ct)) {
+	case AF_INET:
+		hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info);
+		break;
+	case AF_INET6:
+		hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return reciprocal_scale(hash, info->total_nodes);
+}
+
+static inline bool
+xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
+{
+	bool is_multicast = false;
+
+	switch(family) {
+	case NFPROTO_IPV4:
+		is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr);
+		break;
+	case NFPROTO_IPV6:
+		is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	return is_multicast;
+}
+
+static bool
+xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct sk_buff *pskb = (struct sk_buff *)skb;
+	const struct xt_cluster_match_info *info = par->matchinfo;
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned long hash;
+
+	/* This match assumes that all nodes see the same packets. This can be
+	 * achieved if the switch that connects the cluster nodes support some
+	 * sort of 'port mirroring'. However, if your switch does not support
+	 * this, your cluster nodes can reply ARP request using a multicast MAC
+	 * address. Thus, your switch will flood the same packets to the
+	 * cluster nodes with the same multicast MAC address. Using a multicast
+	 * link address is a RFC 1812 (section 3.3.2) violation, but this works
+	 * fine in practise.
+	 *
+	 * Unfortunately, if you use the multicast MAC address, the link layer
+	 * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted
+	 * by TCP and others for packets coming to this node. For that reason,
+	 * this match mangles skbuff's pkt_type if it detects a packet
+	 * addressed to a unicast address but using PACKET_MULTICAST. Yes, I
+	 * know, matches should not alter packets, but we are doing this here
+	 * because we would need to add a PKTTYPE target for this sole purpose.
+	 */
+	if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) &&
+	    skb->pkt_type == PACKET_MULTICAST) {
+	    	pskb->pkt_type = PACKET_HOST;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return false;
+
+	if (ct->master)
+		hash = xt_cluster_hash(ct->master, info);
+	else
+		hash = xt_cluster_hash(ct, info);
+
+	return !!((1 << hash) & info->node_mask) ^
+	       !!(info->flags & XT_CLUSTER_F_INV);
+}
+
+static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_cluster_match_info *info = par->matchinfo;
+	int ret;
+
+	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
+		pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n",
+				    info->total_nodes, XT_CLUSTER_NODES_MAX);
+		return -EINVAL;
+	}
+	if (info->node_mask >= (1ULL << info->total_nodes)) {
+		pr_info_ratelimited("node mask cannot exceed total number of nodes\n");
+		return -EDOM;
+	}
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match xt_cluster_match __read_mostly = {
+	.name		= "cluster",
+	.family		= NFPROTO_UNSPEC,
+	.match		= xt_cluster_mt,
+	.checkentry	= xt_cluster_mt_checkentry,
+	.matchsize	= sizeof(struct xt_cluster_match_info),
+	.destroy	= xt_cluster_mt_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_cluster_mt_init(void)
+{
+	return xt_register_match(&xt_cluster_match);
+}
+
+static void __exit xt_cluster_mt_fini(void)
+{
+	xt_unregister_match(&xt_cluster_match);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: hash-based cluster match");
+MODULE_ALIAS("ipt_cluster");
+MODULE_ALIAS("ip6t_cluster");
+module_init(xt_cluster_mt_init);
+module_exit(xt_cluster_mt_fini);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
new file mode 100644
index 000000000..f095557e3
--- /dev/null
+++ b/net/netfilter/xt_comment.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implements a dummy match to allow attaching comments to rules
+ *
+ * 2003-05-13 Brad Fisher (brad@info-link.net)
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_comment.h>
+
+MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
+MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a comment");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_comment");
+MODULE_ALIAS("ip6t_comment");
+
+static bool
+comment_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	/* We always match */
+	return true;
+}
+
+static struct xt_match comment_mt_reg __read_mostly = {
+	.name      = "comment",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = comment_mt,
+	.matchsize = sizeof(struct xt_comment_info),
+	.me        = THIS_MODULE,
+};
+
+static int __init comment_mt_init(void)
+{
+	return xt_register_match(&comment_mt_reg);
+}
+
+static void __exit comment_mt_exit(void)
+{
+	xt_unregister_match(&comment_mt_reg);
+}
+
+module_init(comment_mt_init);
+module_exit(comment_mt_exit);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
new file mode 100644
index 000000000..93cb018c3
--- /dev/null
+++ b/net/netfilter/xt_connbytes.c
@@ -0,0 +1,157 @@
+/* Kernel module to match connection tracking byte counter.
+ * GPL (C) 2002 Martin Devera (devik@cdi.cz).
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/math64.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
+MODULE_ALIAS("ipt_connbytes");
+MODULE_ALIAS("ip6t_connbytes");
+
+static bool
+connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	u_int64_t what = 0;	/* initialize to make gcc happy */
+	u_int64_t bytes = 0;
+	u_int64_t pkts = 0;
+	const struct nf_conn_acct *acct;
+	const struct nf_conn_counter *counters;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return false;
+
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
+		return false;
+
+	counters = acct->counter;
+	switch (sinfo->what) {
+	case XT_CONNBYTES_PKTS:
+		switch (sinfo->direction) {
+		case XT_CONNBYTES_DIR_ORIGINAL:
+			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
+			break;
+		case XT_CONNBYTES_DIR_REPLY:
+			what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
+			break;
+		case XT_CONNBYTES_DIR_BOTH:
+			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
+			what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
+			break;
+		}
+		break;
+	case XT_CONNBYTES_BYTES:
+		switch (sinfo->direction) {
+		case XT_CONNBYTES_DIR_ORIGINAL:
+			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+			break;
+		case XT_CONNBYTES_DIR_REPLY:
+			what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+			break;
+		case XT_CONNBYTES_DIR_BOTH:
+			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+			what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+			break;
+		}
+		break;
+	case XT_CONNBYTES_AVGPKT:
+		switch (sinfo->direction) {
+		case XT_CONNBYTES_DIR_ORIGINAL:
+			bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+			pkts  = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
+			break;
+		case XT_CONNBYTES_DIR_REPLY:
+			bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+			pkts  = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
+			break;
+		case XT_CONNBYTES_DIR_BOTH:
+			bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) +
+				atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+			pkts  = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) +
+				atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
+			break;
+		}
+		if (pkts != 0)
+			what = div64_u64(bytes, pkts);
+		break;
+	}
+
+	if (sinfo->count.to >= sinfo->count.from)
+		return what <= sinfo->count.to && what >= sinfo->count.from;
+	else /* inverted */
+		return what < sinfo->count.to || what > sinfo->count.from;
+}
+
+static int connbytes_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
+	int ret;
+
+	if (sinfo->what != XT_CONNBYTES_PKTS &&
+	    sinfo->what != XT_CONNBYTES_BYTES &&
+	    sinfo->what != XT_CONNBYTES_AVGPKT)
+		return -EINVAL;
+
+	if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
+	    sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
+	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
+		return -EINVAL;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+
+	/*
+	 * This filter cannot function correctly unless connection tracking
+	 * accounting is enabled, so complain in the hope that someone notices.
+	 */
+	if (!nf_ct_acct_enabled(par->net)) {
+		pr_warn("Forcing CT accounting to be enabled\n");
+		nf_ct_set_acct(par->net, true);
+	}
+
+	return ret;
+}
+
+static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match connbytes_mt_reg __read_mostly = {
+	.name       = "connbytes",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connbytes_mt_check,
+	.match      = connbytes_mt,
+	.destroy    = connbytes_mt_destroy,
+	.matchsize  = sizeof(struct xt_connbytes_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init connbytes_mt_init(void)
+{
+	return xt_register_match(&connbytes_mt_reg);
+}
+
+static void __exit connbytes_mt_exit(void)
+{
+	xt_unregister_match(&connbytes_mt_reg);
+}
+
+module_init(connbytes_mt_init);
+module_exit(connbytes_mt_exit);
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
new file mode 100644
index 000000000..87505cdad
--- /dev/null
+++ b/net/netfilter/xt_connlabel.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2013 Astaro GmbH & Co KG
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: add/match connection tracking labels");
+MODULE_ALIAS("ipt_connlabel");
+MODULE_ALIAS("ip6t_connlabel");
+
+static bool
+connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_connlabel_mtinfo *info = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_labels *labels;
+	struct nf_conn *ct;
+	bool invert = info->options & XT_CONNLABEL_OP_INVERT;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return invert;
+
+	labels = nf_ct_labels_find(ct);
+	if (!labels)
+		return invert;
+
+	if (test_bit(info->bit, labels->bits))
+		return !invert;
+
+	if (info->options & XT_CONNLABEL_OP_SET) {
+		if (!test_and_set_bit(info->bit, labels->bits))
+			nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+		return !invert;
+	}
+
+	return invert;
+}
+
+static int connlabel_mt_check(const struct xt_mtchk_param *par)
+{
+	const int options = XT_CONNLABEL_OP_INVERT |
+			    XT_CONNLABEL_OP_SET;
+	struct xt_connlabel_mtinfo *info = par->matchinfo;
+	int ret;
+
+	if (info->options & ~options) {
+		pr_info_ratelimited("Unknown options in mask %x\n",
+				    info->options);
+		return -EINVAL;
+	}
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0) {
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+		return ret;
+	}
+
+	ret = nf_connlabels_get(par->net, info->bit);
+	if (ret < 0)
+		nf_ct_netns_put(par->net, par->family);
+	return ret;
+}
+
+static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_connlabels_put(par->net);
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match connlabels_mt_reg __read_mostly = {
+	.name           = "connlabel",
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connlabel_mt_check,
+	.match          = connlabel_mt,
+	.matchsize      = sizeof(struct xt_connlabel_mtinfo),
+	.destroy        = connlabel_mt_destroy,
+	.me             = THIS_MODULE,
+};
+
+static int __init connlabel_mt_init(void)
+{
+	return xt_register_match(&connlabels_mt_reg);
+}
+
+static void __exit connlabel_mt_exit(void)
+{
+	xt_unregister_match(&connlabels_mt_reg);
+}
+
+module_init(connlabel_mt_init);
+module_exit(connlabel_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
new file mode 100644
index 000000000..5d04ef80a
--- /dev/null
+++ b/net/netfilter/xt_connlimit.c
@@ -0,0 +1,137 @@
+/*
+ * netfilter module to limit the number of parallel tcp
+ * connections per IP address.
+ *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ *		only ignore TIME_WAIT or gone connections
+ *   (C) CC Computer Consultants GmbH, 2007
+ *
+ * based on ...
+ *
+ * Kernel module to match connection tracking information.
+ * GPL (C) 1999  Rusty Russell (rusty@rustcorp.com.au).
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connlimit.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_count.h>
+
+static bool
+connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = xt_net(par);
+	const struct xt_connlimit_info *info = par->matchinfo;
+	struct nf_conntrack_tuple tuple;
+	const struct nf_conntrack_tuple *tuple_ptr = &tuple;
+	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int connections;
+	u32 key[5];
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct != NULL) {
+		tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+		zone = nf_ct_zone(ct);
+	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				      xt_family(par), net, &tuple)) {
+		goto hotdrop;
+	}
+
+	if (xt_family(par) == NFPROTO_IPV6) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+		union nf_inet_addr addr;
+		unsigned int i;
+
+		memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+		       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
+
+		for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
+			addr.ip6[i] &= info->mask.ip6[i];
+		memcpy(key, &addr, sizeof(addr.ip6));
+		key[4] = zone->id;
+	} else {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
+			 (__force __u32)iph->daddr : (__force __u32)iph->saddr;
+		key[0] &= (__force __u32)info->mask.ip;
+		key[1] = zone->id;
+	}
+
+	connections = nf_conncount_count(net, info->data, key, tuple_ptr,
+					 zone);
+	if (connections == 0)
+		/* kmalloc failed, drop it entirely */
+		goto hotdrop;
+
+	return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
+
+ hotdrop:
+	par->hotdrop = true;
+	return false;
+}
+
+static int connlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_connlimit_info *info = par->matchinfo;
+	unsigned int keylen;
+
+	keylen = sizeof(u32);
+	if (par->family == NFPROTO_IPV6)
+		keylen += sizeof(struct in6_addr);
+	else
+		keylen += sizeof(struct in_addr);
+
+	/* init private data */
+	info->data = nf_conncount_init(par->net, par->family, keylen);
+
+	return PTR_ERR_OR_ZERO(info->data);
+}
+
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_connlimit_info *info = par->matchinfo;
+
+	nf_conncount_destroy(par->net, par->family, info->data);
+}
+
+static struct xt_match connlimit_mt_reg __read_mostly = {
+	.name       = "connlimit",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connlimit_mt_check,
+	.match      = connlimit_mt,
+	.matchsize  = sizeof(struct xt_connlimit_info),
+	.usersize   = offsetof(struct xt_connlimit_info, data),
+	.destroy    = connlimit_mt_destroy,
+	.me         = THIS_MODULE,
+};
+
+static int __init connlimit_mt_init(void)
+{
+	return xt_register_match(&connlimit_mt_reg);
+}
+
+static void __exit connlimit_mt_exit(void)
+{
+	xt_unregister_match(&connlimit_mt_reg);
+}
+
+module_init(connlimit_mt_init);
+module_exit(connlimit_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Number of connections matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_connlimit");
+MODULE_ALIAS("ip6t_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
new file mode 100644
index 000000000..ad3c033db
--- /dev/null
+++ b/net/netfilter/xt_connmark.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *	xt_connmark - Netfilter module to operate on connection marks
+ *
+ *	Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
+ *	by Henrik Nordstrom <hno@marasystems.com>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@medozas.de>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
+MODULE_DESCRIPTION("Xtables: connection mark operations");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
+MODULE_ALIAS("ip6t_CONNMARK");
+MODULE_ALIAS("ipt_connmark");
+MODULE_ALIAS("ip6t_connmark");
+
+static unsigned int
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
+{
+	enum ip_conntrack_info ctinfo;
+	u_int32_t new_targetmark;
+	struct nf_conn *ct;
+	u_int32_t newmark;
+	u_int32_t oldmark;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return XT_CONTINUE;
+
+	switch (info->mode) {
+	case XT_CONNMARK_SET:
+		oldmark = READ_ONCE(ct->mark);
+		newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
+		if (info->shift_dir == D_SHIFT_RIGHT)
+			newmark >>= info->shift_bits;
+		else
+			newmark <<= info->shift_bits;
+
+		if (READ_ONCE(ct->mark) != newmark) {
+			WRITE_ONCE(ct->mark, newmark);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_SAVE:
+		new_targetmark = (skb->mark & info->nfmask);
+		if (info->shift_dir == D_SHIFT_RIGHT)
+			new_targetmark >>= info->shift_bits;
+		else
+			new_targetmark <<= info->shift_bits;
+
+		newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^
+			  new_targetmark;
+		if (READ_ONCE(ct->mark) != newmark) {
+			WRITE_ONCE(ct->mark, newmark);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_RESTORE:
+		new_targetmark = (READ_ONCE(ct->mark) & info->ctmask);
+		if (info->shift_dir == D_SHIFT_RIGHT)
+			new_targetmark >>= info->shift_bits;
+		else
+			new_targetmark <<= info->shift_bits;
+
+		newmark = (skb->mark & ~info->nfmask) ^
+			  new_targetmark;
+		skb->mark = newmark;
+		break;
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
+	const struct xt_connmark_tginfo2 info2 = {
+		.ctmark	= info->ctmark,
+		.ctmask	= info->ctmask,
+		.nfmask	= info->nfmask,
+		.mode	= info->mode,
+	};
+
+	return connmark_tg_shift(skb, &info2);
+}
+
+static unsigned int
+connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_connmark_tginfo2 *info = par->targinfo;
+
+	return connmark_tg_shift(skb, info);
+}
+
+static int connmark_tg_check(const struct xt_tgchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static bool
+connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return false;
+
+	return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert;
+}
+
+static int connmark_mt_check(const struct xt_mtchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_target connmark_tg_reg[] __read_mostly = {
+	{
+		.name           = "CONNMARK",
+		.revision       = 1,
+		.family         = NFPROTO_UNSPEC,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg,
+		.targetsize     = sizeof(struct xt_connmark_tginfo1),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "CONNMARK",
+		.revision       = 2,
+		.family         = NFPROTO_UNSPEC,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg_v2,
+		.targetsize     = sizeof(struct xt_connmark_tginfo2),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	}
+};
+
+static struct xt_match connmark_mt_reg __read_mostly = {
+	.name           = "connmark",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_mt_check,
+	.match          = connmark_mt,
+	.matchsize      = sizeof(struct xt_connmark_mtinfo1),
+	.destroy        = connmark_mt_destroy,
+	.me             = THIS_MODULE,
+};
+
+static int __init connmark_mt_init(void)
+{
+	int ret;
+
+	ret = xt_register_targets(connmark_tg_reg,
+				  ARRAY_SIZE(connmark_tg_reg));
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&connmark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_targets(connmark_tg_reg,
+				      ARRAY_SIZE(connmark_tg_reg));
+		return ret;
+	}
+	return 0;
+}
+
+static void __exit connmark_mt_exit(void)
+{
+	xt_unregister_match(&connmark_mt_reg);
+	xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
+}
+
+module_init(connmark_mt_init);
+module_exit(connmark_mt_exit);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
new file mode 100644
index 000000000..ea299da24
--- /dev/null
+++ b/net/netfilter/xt_conntrack.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_conntrack - Netfilter module to match connection tracking
+ *	information. (Superset of Rusty's minimalistic state match.)
+ *
+ *	(C) 2001  Marc Boucher (marc@mbsi.ca).
+ *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_conntrack.h>
+#include <net/netfilter/nf_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: connection tracking state match");
+MODULE_ALIAS("ipt_conntrack");
+MODULE_ALIAS("ip6t_conntrack");
+
+static bool
+conntrack_addrcmp(const union nf_inet_addr *kaddr,
+                  const union nf_inet_addr *uaddr,
+                  const union nf_inet_addr *umask, unsigned int l3proto)
+{
+	if (l3proto == NFPROTO_IPV4)
+		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+	else if (l3proto == NFPROTO_IPV6)
+		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+		       &uaddr->in6) == 0;
+	else
+		return false;
+}
+
+static inline bool
+conntrack_mt_origsrc(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo2 *info,
+		     u_int8_t family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+	       &info->origsrc_addr, &info->origsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_origdst(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo2 *info,
+		     u_int8_t family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
+	       &info->origdst_addr, &info->origdst_mask, family);
+}
+
+static inline bool
+conntrack_mt_replsrc(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo2 *info,
+		     u_int8_t family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
+	       &info->replsrc_addr, &info->replsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_repldst(const struct nf_conn *ct,
+                     const struct xt_conntrack_mtinfo2 *info,
+		     u_int8_t family)
+{
+	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
+	       &info->repldst_addr, &info->repldst_mask, family);
+}
+
+static inline bool
+ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
+                    const struct nf_conn *ct)
+{
+	const struct nf_conntrack_tuple *tuple;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+	    (nf_ct_protonum(ct) == info->l4proto) ^
+	    !(info->invert_flags & XT_CONNTRACK_PROTO))
+		return false;
+
+	/* Shortcut to match all recognized protocols by using ->src.all. */
+	if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+	    (tuple->src.u.all == info->origsrc_port) ^
+	    !(info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+	    (tuple->dst.u.all == info->origdst_port) ^
+	    !(info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+		return false;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+	    (tuple->src.u.all == info->replsrc_port) ^
+	    !(info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+	    (tuple->dst.u.all == info->repldst_port) ^
+	    !(info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+		return false;
+
+	return true;
+}
+
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+	return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+		       const struct nf_conn *ct)
+{
+	const struct nf_conntrack_tuple *tuple;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+	    (nf_ct_protonum(ct) == info->l4proto) ^
+	    !(info->invert_flags & XT_CONNTRACK_PROTO))
+		return false;
+
+	/* Shortcut to match all recognized protocols by using ->src.all. */
+	if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+	    !port_match(info->origsrc_port, info->origsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+	    !port_match(info->origdst_port, info->origdst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+		return false;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+	    !port_match(info->replsrc_port, info->replsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+	    !port_match(info->repldst_port, info->repldst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+		return false;
+
+	return true;
+}
+
+static bool
+conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
+             u16 state_mask, u16 status_mask)
+{
+	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int statebit;
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	if (ct)
+		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	else if (ctinfo == IP_CT_UNTRACKED)
+		statebit = XT_CONNTRACK_STATE_UNTRACKED;
+	else
+		statebit = XT_CONNTRACK_STATE_INVALID;
+
+	if (info->match_flags & XT_CONNTRACK_STATE) {
+		if (ct != NULL) {
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
+				statebit |= XT_CONNTRACK_STATE_SNAT;
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
+				statebit |= XT_CONNTRACK_STATE_DNAT;
+		}
+		if (!!(state_mask & statebit) ^
+		    !(info->invert_flags & XT_CONNTRACK_STATE))
+			return false;
+	}
+
+	if (ct == NULL)
+		return info->match_flags & XT_CONNTRACK_STATE;
+	if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
+	    (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
+	    !(info->invert_flags & XT_CONNTRACK_DIRECTION))
+		return false;
+
+	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
+		if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^
+		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_ORIGDST)
+		if (conntrack_mt_origdst(ct, info, xt_family(par)) ^
+		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_REPLSRC)
+		if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^
+		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
+			return false;
+
+	if (info->match_flags & XT_CONNTRACK_REPLDST)
+		if (conntrack_mt_repldst(ct, info, xt_family(par)) ^
+		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
+			return false;
+
+	if (par->match->revision != 3) {
+		if (!ct_proto_port_check(info, ct))
+			return false;
+	} else {
+		if (!ct_proto_port_check_v3(par->matchinfo, ct))
+			return false;
+	}
+
+	if ((info->match_flags & XT_CONNTRACK_STATUS) &&
+	    (!!(status_mask & ct->status) ^
+	    !(info->invert_flags & XT_CONNTRACK_STATUS)))
+		return false;
+
+	if (info->match_flags & XT_CONNTRACK_EXPIRES) {
+		unsigned long expires = nf_ct_expires(ct) / HZ;
+
+		if ((expires >= info->expires_min &&
+		    expires <= info->expires_max) ^
+		    !(info->invert_flags & XT_CONNTRACK_EXPIRES))
+			return false;
+	}
+	return true;
+}
+
+static bool
+conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
+
+	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
+static bool
+conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
+
+	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
+static int conntrack_mt_check(const struct xt_mtchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match conntrack_mt_reg[] __read_mostly = {
+	{
+		.name       = "conntrack",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
+		.match      = conntrack_mt_v1,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "conntrack",
+		.revision   = 2,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo2),
+		.match      = conntrack_mt_v2,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "conntrack",
+		.revision   = 3,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo3),
+		.match      = conntrack_mt_v3,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init conntrack_mt_init(void)
+{
+	return xt_register_matches(conntrack_mt_reg,
+	       ARRAY_SIZE(conntrack_mt_reg));
+}
+
+static void __exit conntrack_mt_exit(void)
+{
+	xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg));
+}
+
+module_init(conntrack_mt_init);
+module_exit(conntrack_mt_exit);
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
new file mode 100644
index 000000000..3bdc302a0
--- /dev/null
+++ b/net/netfilter/xt_cpu.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match running CPU */
+
+/*
+ * Might be used to distribute connections on several daemons, if
+ * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
+ * each RX queue IRQ affined to one CPU (1:1 mapping)
+ */
+
+/* (C) 2010 Eric Dumazet
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/xt_cpu.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
+MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
+
+static int cpu_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+	return 0;
+}
+
+static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cpu_info *info = par->matchinfo;
+
+	return (info->cpu == smp_processor_id()) ^ info->invert;
+}
+
+static struct xt_match cpu_mt_reg __read_mostly = {
+	.name       = "cpu",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = cpu_mt_check,
+	.match      = cpu_mt,
+	.matchsize  = sizeof(struct xt_cpu_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init cpu_mt_init(void)
+{
+	return xt_register_match(&cpu_mt_reg);
+}
+
+static void __exit cpu_mt_exit(void)
+{
+	xt_unregister_match(&cpu_mt_reg);
+}
+
+module_init(cpu_mt_init);
+module_exit(cpu_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
new file mode 100644
index 000000000..e5a13ecbe
--- /dev/null
+++ b/net/netfilter/xt_dccp.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * iptables module for DCCP protocol header matching
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/ip.h>
+#include <linux/dccp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dccp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DCCP protocol packet match");
+MODULE_ALIAS("ipt_dccp");
+MODULE_ALIAS("ip6t_dccp");
+
+#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+				  || (!!((invflag) & (option)) ^ (cond)))
+
+static unsigned char *dccp_optbuf;
+static DEFINE_SPINLOCK(dccp_buflock);
+
+static inline bool
+dccp_find_option(u_int8_t option,
+		 const struct sk_buff *skb,
+		 unsigned int protoff,
+		 const struct dccp_hdr *dh,
+		 bool *hotdrop)
+{
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	const unsigned char *op;
+	unsigned int optoff = __dccp_hdr_len(dh);
+	unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
+	unsigned int i;
+
+	if (dh->dccph_doff * 4 < __dccp_hdr_len(dh))
+		goto invalid;
+
+	if (!optlen)
+		return false;
+
+	spin_lock_bh(&dccp_buflock);
+	op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf);
+	if (op == NULL) {
+		/* If we don't have the whole header, drop packet. */
+		goto partial;
+	}
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == option) {
+			spin_unlock_bh(&dccp_buflock);
+			return true;
+		}
+
+		if (op[i] < 2)
+			i++;
+		else
+			i += op[i+1]?:1;
+	}
+
+	spin_unlock_bh(&dccp_buflock);
+	return false;
+
+partial:
+	spin_unlock_bh(&dccp_buflock);
+invalid:
+	*hotdrop = true;
+	return false;
+}
+
+
+static inline bool
+match_types(const struct dccp_hdr *dh, u_int16_t typemask)
+{
+	return typemask & (1 << dh->dccph_type);
+}
+
+static inline bool
+match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
+	     const struct dccp_hdr *dh, bool *hotdrop)
+{
+	return dccp_find_option(option, skb, protoff, dh, hotdrop);
+}
+
+static bool
+dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_dccp_info *info = par->matchinfo;
+	const struct dccp_hdr *dh;
+	struct dccp_hdr _dh;
+
+	if (par->fragoff != 0)
+		return false;
+
+	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
+	if (dh == NULL) {
+		par->hotdrop = true;
+		return false;
+	}
+
+	return  DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0]
+			&& ntohs(dh->dccph_sport) <= info->spts[1],
+			XT_DCCP_SRC_PORTS, info->flags, info->invflags)
+		&& DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0]
+			&& ntohs(dh->dccph_dport) <= info->dpts[1],
+			XT_DCCP_DEST_PORTS, info->flags, info->invflags)
+		&& DCCHECK(match_types(dh, info->typemask),
+			   XT_DCCP_TYPE, info->flags, info->invflags)
+		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
+					&par->hotdrop),
+			   XT_DCCP_OPTION, info->flags, info->invflags);
+}
+
+static int dccp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_dccp_info *info = par->matchinfo;
+
+	if (info->flags & ~XT_DCCP_VALID_FLAGS)
+		return -EINVAL;
+	if (info->invflags & ~XT_DCCP_VALID_FLAGS)
+		return -EINVAL;
+	if (info->invflags & ~info->flags)
+		return -EINVAL;
+	return 0;
+}
+
+static struct xt_match dccp_mt_reg[] __read_mostly = {
+	{
+		.name 		= "dccp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= dccp_mt_check,
+		.match		= dccp_mt,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
+	{
+		.name 		= "dccp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= dccp_mt_check,
+		.match		= dccp_mt,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
+};
+
+static int __init dccp_mt_init(void)
+{
+	int ret;
+
+	/* doff is 8 bits, so the maximum option size is (4*256).  Don't put
+	 * this in BSS since DaveM is worried about locked TLB's for kernel
+	 * BSS. */
+	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
+	if (!dccp_optbuf)
+		return -ENOMEM;
+	ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
+	if (ret)
+		goto out_kfree;
+	return ret;
+
+out_kfree:
+	kfree(dccp_optbuf);
+	return ret;
+}
+
+static void __exit dccp_mt_exit(void)
+{
+	xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
+	kfree(dccp_optbuf);
+}
+
+module_init(dccp_mt_init);
+module_exit(dccp_mt_exit);
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
new file mode 100644
index 000000000..9520dd000
--- /dev/null
+++ b/net/netfilter/xt_devgroup.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <linux/netfilter/xt_devgroup.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Device group match");
+MODULE_ALIAS("ipt_devgroup");
+MODULE_ALIAS("ip6t_devgroup");
+
+static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
+		return false;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
+		return false;
+
+	return true;
+}
+
+static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
+			    XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
+			       (1 << NF_INET_LOCAL_IN) |
+			       (1 << NF_INET_FORWARD)))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    par->hook_mask & ~((1 << NF_INET_FORWARD) |
+			       (1 << NF_INET_LOCAL_OUT) |
+			       (1 << NF_INET_POST_ROUTING)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct xt_match devgroup_mt_reg __read_mostly = {
+	.name		= "devgroup",
+	.match		= devgroup_mt,
+	.checkentry	= devgroup_mt_checkentry,
+	.matchsize	= sizeof(struct xt_devgroup_info),
+	.family		= NFPROTO_UNSPEC,
+	.me		= THIS_MODULE
+};
+
+static int __init devgroup_mt_init(void)
+{
+	return xt_register_match(&devgroup_mt_reg);
+}
+
+static void __exit devgroup_mt_exit(void)
+{
+	xt_unregister_match(&devgroup_mt_reg);
+}
+
+module_init(devgroup_mt_init);
+module_exit(devgroup_mt_exit);
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 000000000..fb0169a8f
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dscp.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_dscp");
+MODULE_ALIAS("ip6t_dscp");
+MODULE_ALIAS("ipt_tos");
+MODULE_ALIAS("ip6t_tos");
+
+static bool
+dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_dscp_info *info = par->matchinfo;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static bool
+dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_dscp_info *info = par->matchinfo;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int dscp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_dscp_info *info = par->matchinfo;
+
+	if (info->dscp > XT_DSCP_MAX)
+		return -EDOM;
+
+	return 0;
+}
+
+static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_tos_match_info *info = par->matchinfo;
+
+	if (xt_family(par) == NFPROTO_IPV4)
+		return ((ip_hdr(skb)->tos & info->tos_mask) ==
+		       info->tos_value) ^ !!info->invert;
+	else
+		return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
+		       info->tos_value) ^ !!info->invert;
+}
+
+static struct xt_match dscp_mt_reg[] __read_mostly = {
+	{
+		.name		= "dscp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= dscp_mt_check,
+		.match		= dscp_mt,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "dscp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= dscp_mt_check,
+		.match		= dscp_mt6,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tos",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.match		= tos_mt,
+		.matchsize	= sizeof(struct xt_tos_match_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tos",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.match		= tos_mt,
+		.matchsize	= sizeof(struct xt_tos_match_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init dscp_mt_init(void)
+{
+	return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
+}
+
+static void __exit dscp_mt_exit(void)
+{
+	xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
+}
+
+module_init(dscp_mt_init);
+module_exit(dscp_mt_exit);
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
new file mode 100644
index 000000000..b96e8203a
--- /dev/null
+++ b/net/netfilter/xt_ecn.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ecn.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ecn");
+MODULE_ALIAS("ip6t_ecn");
+
+static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ecn_info *einfo = par->matchinfo;
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
+
+	/* In practice, TCP match does this, so can't fail.  But let's
+	 * be good citizens.
+	 */
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return false;
+
+	if (einfo->operation & XT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & XT_ECN_OP_MATCH_ECE) {
+			if (th->ece == 1)
+				return false;
+		} else {
+			if (th->ece == 0)
+				return false;
+		}
+	}
+
+	if (einfo->operation & XT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & XT_ECN_OP_MATCH_CWR) {
+			if (th->cwr == 1)
+				return false;
+		} else {
+			if (th->cwr == 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static inline bool match_ip(const struct sk_buff *skb,
+			    const struct xt_ecn_info *einfo)
+{
+	return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & XT_ECN_OP_MATCH_IP);
+}
+
+static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ecn_info *info = par->matchinfo;
+
+	if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info))
+		return false;
+
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+	    !match_tcp(skb, par))
+		return false;
+
+	return true;
+}
+
+static int ecn_mt_check4(const struct xt_mtchk_param *par)
+{
+	const struct xt_ecn_info *info = par->matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+
+	if (info->operation & XT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->invert & XT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
+		pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline bool match_ipv6(const struct sk_buff *skb,
+			      const struct xt_ecn_info *einfo)
+{
+	return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) ==
+	        einfo->ip_ect) ^
+	       !!(einfo->invert & XT_ECN_OP_MATCH_IP);
+}
+
+static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ecn_info *info = par->matchinfo;
+
+	if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info))
+		return false;
+
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+	    !match_tcp(skb, par))
+		return false;
+
+	return true;
+}
+
+static int ecn_mt_check6(const struct xt_mtchk_param *par)
+{
+	const struct xt_ecn_info *info = par->matchinfo;
+	const struct ip6t_ip6 *ip = par->entryinfo;
+
+	if (info->operation & XT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->invert & XT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+	    (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) {
+		pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match ecn_mt_reg[] __read_mostly = {
+	{
+		.name		= "ecn",
+		.family		= NFPROTO_IPV4,
+		.match		= ecn_mt4,
+		.matchsize	= sizeof(struct xt_ecn_info),
+		.checkentry	= ecn_mt_check4,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "ecn",
+		.family		= NFPROTO_IPV6,
+		.match		= ecn_mt6,
+		.matchsize	= sizeof(struct xt_ecn_info),
+		.checkentry	= ecn_mt_check6,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init ecn_mt_init(void)
+{
+	return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg));
+}
+
+static void __exit ecn_mt_exit(void)
+{
+	xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg));
+}
+
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
new file mode 100644
index 000000000..2a1c0ad0f
--- /dev/null
+++ b/net/netfilter/xt_esp.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match ESP parameters. */
+
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter/xt_esp.h>
+#include <linux/netfilter/x_tables.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
+MODULE_ALIAS("ipt_esp");
+MODULE_ALIAS("ip6t_esp");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+	bool r;
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
+	r = (spi >= min && spi <= max) ^ invert;
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
+}
+
+static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ip_esp_hdr *eh;
+	struct ip_esp_hdr _esp;
+	const struct xt_esp *espinfo = par->matchinfo;
+
+	/* Must not be a fragment. */
+	if (par->fragoff != 0)
+		return false;
+
+	eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp);
+	if (eh == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		pr_debug("Dropping evil ESP tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi),
+			 !!(espinfo->invflags & XT_ESP_INV_SPI));
+}
+
+static int esp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_esp *espinfo = par->matchinfo;
+
+	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
+		pr_debug("unknown flags %X\n", espinfo->invflags);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match esp_mt_reg[] __read_mostly = {
+	{
+		.name		= "esp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= esp_mt_check,
+		.match		= esp_mt,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "esp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= esp_mt_check,
+		.match		= esp_mt,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init esp_mt_init(void)
+{
+	return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
+}
+
+static void __exit esp_mt_exit(void)
+{
+	xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
+}
+
+module_init(esp_mt_init);
+module_exit(esp_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
new file mode 100644
index 000000000..0859b8f76
--- /dev/null
+++ b/net/netfilter/xt_hashlimit.c
@@ -0,0 +1,1332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_hashlimit - Netfilter module to limit the number of packets per time
+ *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *
+ *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#endif
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <uapi/linux/netfilter/xt_hashlimit.h>
+
+#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
+			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+			  XT_HASHLIMIT_RATE_MATCH)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
+MODULE_ALIAS("ipt_hashlimit");
+MODULE_ALIAS("ip6t_hashlimit");
+
+struct hashlimit_net {
+	struct hlist_head	htables;
+	struct proc_dir_entry	*ipt_hashlimit;
+	struct proc_dir_entry	*ip6t_hashlimit;
+};
+
+static unsigned int hashlimit_net_id;
+static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
+{
+	return net_generic(net, hashlimit_net_id);
+}
+
+/* need to declare this at the top */
+static const struct seq_operations dl_seq_ops_v2;
+static const struct seq_operations dl_seq_ops_v1;
+static const struct seq_operations dl_seq_ops;
+
+/* hash table crap */
+struct dsthash_dst {
+	union {
+		struct {
+			__be32 src;
+			__be32 dst;
+		} ip;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+		struct {
+			__be32 src[4];
+			__be32 dst[4];
+		} ip6;
+#endif
+	};
+	__be16 src_port;
+	__be16 dst_port;
+};
+
+struct dsthash_ent {
+	/* static / read-only parts in the beginning */
+	struct hlist_node node;
+	struct dsthash_dst dst;
+
+	/* modified structure members in the end */
+	spinlock_t lock;
+	unsigned long expires;		/* precalculated expiry time */
+	struct {
+		unsigned long prev;	/* last modification */
+		union {
+			struct {
+				u_int64_t credit;
+				u_int64_t credit_cap;
+				u_int64_t cost;
+			};
+			struct {
+				u_int32_t interval, prev_window;
+				u_int64_t current_rate;
+				u_int64_t rate;
+				int64_t burst;
+			};
+		};
+	} rateinfo;
+	struct rcu_head rcu;
+};
+
+struct xt_hashlimit_htable {
+	struct hlist_node node;		/* global list of all htables */
+	refcount_t use;
+	u_int8_t family;
+	bool rnd_initialized;
+
+	struct hashlimit_cfg3 cfg;	/* config */
+
+	/* used internally */
+	spinlock_t lock;		/* lock for list_head */
+	u_int32_t rnd;			/* random seed for hash */
+	unsigned int count;		/* number entries in table */
+	struct delayed_work gc_work;
+
+	/* seq_file stuff */
+	struct proc_dir_entry *pde;
+	const char *name;
+	struct net *net;
+
+	struct hlist_head hash[];	/* hashtable itself */
+};
+
+static int
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
+{
+	if (revision == 1) {
+		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 2) {
+		struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 3) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg3));
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static DEFINE_MUTEX(hashlimit_mutex);	/* protects htables list */
+static struct kmem_cache *hashlimit_cachep __read_mostly;
+
+static inline bool dst_cmp(const struct dsthash_ent *ent,
+			   const struct dsthash_dst *b)
+{
+	return !memcmp(&ent->dst, b, sizeof(ent->dst));
+}
+
+static u_int32_t
+hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
+{
+	u_int32_t hash = jhash2((const u32 *)dst,
+				sizeof(*dst)/sizeof(u32),
+				ht->rnd);
+	/*
+	 * Instead of returning hash % ht->cfg.size (implying a divide)
+	 * we return the high 32 bits of the (hash * ht->cfg.size) that will
+	 * give results between [0 and cfg.size-1] and same hash distribution,
+	 * but using a multiply, less expensive than a divide
+	 */
+	return reciprocal_scale(hash, ht->cfg.size);
+}
+
+static struct dsthash_ent *
+dsthash_find(const struct xt_hashlimit_htable *ht,
+	     const struct dsthash_dst *dst)
+{
+	struct dsthash_ent *ent;
+	u_int32_t hash = hash_dst(ht, dst);
+
+	if (!hlist_empty(&ht->hash[hash])) {
+		hlist_for_each_entry_rcu(ent, &ht->hash[hash], node)
+			if (dst_cmp(ent, dst)) {
+				spin_lock(&ent->lock);
+				return ent;
+			}
+	}
+	return NULL;
+}
+
+/* allocate dsthash_ent, initialize dst, put in htable and lock it */
+static struct dsthash_ent *
+dsthash_alloc_init(struct xt_hashlimit_htable *ht,
+		   const struct dsthash_dst *dst, bool *race)
+{
+	struct dsthash_ent *ent;
+
+	spin_lock(&ht->lock);
+
+	/* Two or more packets may race to create the same entry in the
+	 * hashtable, double check if this packet lost race.
+	 */
+	ent = dsthash_find(ht, dst);
+	if (ent != NULL) {
+		spin_unlock(&ht->lock);
+		*race = true;
+		return ent;
+	}
+
+	/* initialize hash with random val at the time we allocate
+	 * the first hashtable entry */
+	if (unlikely(!ht->rnd_initialized)) {
+		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
+		ht->rnd_initialized = true;
+	}
+
+	if (ht->cfg.max && ht->count >= ht->cfg.max) {
+		/* FIXME: do something. question is what.. */
+		net_err_ratelimited("max count of %u reached\n", ht->cfg.max);
+		ent = NULL;
+	} else
+		ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+	if (ent) {
+		memcpy(&ent->dst, dst, sizeof(ent->dst));
+		spin_lock_init(&ent->lock);
+
+		spin_lock(&ent->lock);
+		hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+		ht->count++;
+	}
+	spin_unlock(&ht->lock);
+	return ent;
+}
+
+static void dsthash_free_rcu(struct rcu_head *head)
+{
+	struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu);
+
+	kmem_cache_free(hashlimit_cachep, ent);
+}
+
+static inline void
+dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
+{
+	hlist_del_rcu(&ent->node);
+	call_rcu(&ent->rcu, dsthash_free_rcu);
+	ht->count--;
+}
+static void htable_gc(struct work_struct *work);
+
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
+			 const char *name, u_int8_t family,
+			 struct xt_hashlimit_htable **out_hinfo,
+			 int revision)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+	struct xt_hashlimit_htable *hinfo;
+	const struct seq_operations *ops;
+	unsigned int size, i;
+	unsigned long nr_pages = totalram_pages();
+	int ret;
+
+	if (cfg->size) {
+		size = cfg->size;
+	} else {
+		size = (nr_pages << PAGE_SHIFT) / 16384 /
+		       sizeof(struct hlist_head);
+		if (nr_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
+			size = 8192;
+		if (size < 16)
+			size = 16;
+	}
+	/* FIXME: don't use vmalloc() here or anywhere else -HW */
+	hinfo = vmalloc(struct_size(hinfo, hash, size));
+	if (hinfo == NULL)
+		return -ENOMEM;
+	*out_hinfo = hinfo;
+
+	/* copy match config into hashtable config */
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
+	if (ret) {
+		vfree(hinfo);
+		return ret;
+	}
+
+	hinfo->cfg.size = size;
+	if (hinfo->cfg.max == 0)
+		hinfo->cfg.max = 8 * hinfo->cfg.size;
+	else if (hinfo->cfg.max < hinfo->cfg.size)
+		hinfo->cfg.max = hinfo->cfg.size;
+
+	for (i = 0; i < hinfo->cfg.size; i++)
+		INIT_HLIST_HEAD(&hinfo->hash[i]);
+
+	refcount_set(&hinfo->use, 1);
+	hinfo->count = 0;
+	hinfo->family = family;
+	hinfo->rnd_initialized = false;
+	hinfo->name = kstrdup(name, GFP_KERNEL);
+	if (!hinfo->name) {
+		vfree(hinfo);
+		return -ENOMEM;
+	}
+	spin_lock_init(&hinfo->lock);
+
+	switch (revision) {
+	case 1:
+		ops = &dl_seq_ops_v1;
+		break;
+	case 2:
+		ops = &dl_seq_ops_v2;
+		break;
+	default:
+		ops = &dl_seq_ops;
+	}
+
+	hinfo->pde = proc_create_seq_data(name, 0,
+		(family == NFPROTO_IPV4) ?
+		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
+		ops, hinfo);
+	if (hinfo->pde == NULL) {
+		kfree(hinfo->name);
+		vfree(hinfo);
+		return -ENOMEM;
+	}
+	hinfo->net = net;
+
+	INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc);
+	queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work,
+			   msecs_to_jiffies(hinfo->cfg.gc_interval));
+
+	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
+
+	return 0;
+}
+
+static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, bool select_all)
+{
+	unsigned int i;
+
+	for (i = 0; i < ht->cfg.size; i++) {
+		struct dsthash_ent *dh;
+		struct hlist_node *n;
+
+		spin_lock_bh(&ht->lock);
+		hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
+			if (time_after_eq(jiffies, dh->expires) || select_all)
+				dsthash_free(ht, dh);
+		}
+		spin_unlock_bh(&ht->lock);
+		cond_resched();
+	}
+}
+
+static void htable_gc(struct work_struct *work)
+{
+	struct xt_hashlimit_htable *ht;
+
+	ht = container_of(work, struct xt_hashlimit_htable, gc_work.work);
+
+	htable_selective_cleanup(ht, false);
+
+	queue_delayed_work(system_power_efficient_wq,
+			   &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval));
+}
+
+static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net);
+	struct proc_dir_entry *parent;
+
+	if (hinfo->family == NFPROTO_IPV4)
+		parent = hashlimit_net->ipt_hashlimit;
+	else
+		parent = hashlimit_net->ip6t_hashlimit;
+
+	if (parent != NULL)
+		remove_proc_entry(hinfo->name, parent);
+}
+
+static struct xt_hashlimit_htable *htable_find_get(struct net *net,
+						   const char *name,
+						   u_int8_t family)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+	struct xt_hashlimit_htable *hinfo;
+
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
+		if (!strcmp(name, hinfo->name) &&
+		    hinfo->family == family) {
+			refcount_inc(&hinfo->use);
+			return hinfo;
+		}
+	}
+	return NULL;
+}
+
+static void htable_put(struct xt_hashlimit_htable *hinfo)
+{
+	if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
+		hlist_del(&hinfo->node);
+		htable_remove_proc_entry(hinfo);
+		mutex_unlock(&hashlimit_mutex);
+
+		cancel_delayed_work_sync(&hinfo->gc_work);
+		htable_selective_cleanup(hinfo, true);
+		kfree(hinfo->name);
+		vfree(hinfo);
+	}
+}
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+   this algorithm.  The `average rate' in jiffies becomes your initial
+   amount of credit `credit' and the most credit you can ever have
+   `credit_cap'.  The `peak rate' becomes the cost of passing the
+   test, `cost'.
+
+   `prev' tracks the last packet hit: you gain one credit per jiffy.
+   If you get credit balance more than this, the extra credit is
+   discarded.  Every time the match passes, you lose `cost' credits;
+   if you don't have that many, the test fails.
+
+   See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+   To get the maximum range, we multiply by this factor (ie. you get N
+   credits per jiffy).  We want to allow a rate as low as 1 per day
+   (slowest userspace tool allows), which means
+   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
+*/
+#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ)
+#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1)
+
+/* in byte mode, the lowest possible rate is one packet/second.
+ * credit_cap is used as a counter that tells us how many times we can
+ * refill the "credits available" counter when it becomes empty.
+ */
+#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ)
+#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES)
+
+static u32 xt_hashlimit_len_to_chunks(u32 len)
+{
+	return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1;
+}
+
+/* Precision saver. */
+static u64 user2credits(u64 user, int revision)
+{
+	u64 scale = (revision == 1) ?
+		XT_HASHLIMIT_SCALE : XT_HASHLIMIT_SCALE_v2;
+	u64 cpj = (revision == 1) ?
+		CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+
+	/* Avoid overflow: divide the constant operands first */
+	if (scale >= HZ * cpj)
+		return div64_u64(user, div64_u64(scale, HZ * cpj));
+
+	return user * div64_u64(HZ * cpj, scale);
+}
+
+static u32 user2credits_byte(u32 user)
+{
+	u64 us = user;
+	us *= HZ * CREDITS_PER_JIFFY_BYTES;
+	return (u32) (us >> 32);
+}
+
+static u64 user2rate(u64 user)
+{
+	if (user != 0) {
+		return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+	} else {
+		pr_info_ratelimited("invalid rate from userspace: %llu\n",
+				    user);
+		return 0;
+	}
+}
+
+static u64 user2rate_bytes(u32 user)
+{
+	u64 r;
+
+	r = user ? U32_MAX / user : U32_MAX;
+	return (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
+}
+
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
+			    u32 mode, int revision)
+{
+	unsigned long delta = now - dh->rateinfo.prev;
+	u64 cap, cpj;
+
+	if (delta == 0)
+		return;
+
+	if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+		u64 interval = dh->rateinfo.interval * HZ;
+
+		if (delta < interval)
+			return;
+
+		dh->rateinfo.prev = now;
+		dh->rateinfo.prev_window =
+			((dh->rateinfo.current_rate * interval) >
+			 (delta * dh->rateinfo.rate));
+		dh->rateinfo.current_rate = 0;
+
+		return;
+	}
+
+	dh->rateinfo.prev = now;
+
+	if (mode & XT_HASHLIMIT_BYTES) {
+		u64 tmp = dh->rateinfo.credit;
+		dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
+		cap = CREDITS_PER_JIFFY_BYTES * HZ;
+		if (tmp >= dh->rateinfo.credit) {/* overflow */
+			dh->rateinfo.credit = cap;
+			return;
+		}
+	} else {
+		cpj = (revision == 1) ?
+			CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+		dh->rateinfo.credit += delta * cpj;
+		cap = dh->rateinfo.credit_cap;
+	}
+	if (dh->rateinfo.credit > cap)
+		dh->rateinfo.credit = cap;
+}
+
+static void rateinfo_init(struct dsthash_ent *dh,
+			  struct xt_hashlimit_htable *hinfo, int revision)
+{
+	dh->rateinfo.prev = jiffies;
+	if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+		dh->rateinfo.prev_window = 0;
+		dh->rateinfo.current_rate = 0;
+		if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+			dh->rateinfo.rate =
+				user2rate_bytes((u32)hinfo->cfg.avg);
+			if (hinfo->cfg.burst)
+				dh->rateinfo.burst =
+					hinfo->cfg.burst * dh->rateinfo.rate;
+			else
+				dh->rateinfo.burst = dh->rateinfo.rate;
+		} else {
+			dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+			dh->rateinfo.burst =
+				hinfo->cfg.burst + dh->rateinfo.rate;
+		}
+		dh->rateinfo.interval = hinfo->cfg.interval;
+	} else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+		dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
+		dh->rateinfo.credit_cap = hinfo->cfg.burst;
+	} else {
+		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+						   hinfo->cfg.burst, revision);
+		dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision);
+		dh->rateinfo.credit_cap = dh->rateinfo.credit;
+	}
+}
+
+static inline __be32 maskl(__be32 a, unsigned int l)
+{
+	return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
+{
+	switch (p) {
+	case 0 ... 31:
+		i[0] = maskl(i[0], p);
+		i[1] = i[2] = i[3] = 0;
+		break;
+	case 32 ... 63:
+		i[1] = maskl(i[1], p - 32);
+		i[2] = i[3] = 0;
+		break;
+	case 64 ... 95:
+		i[2] = maskl(i[2], p - 64);
+		i[3] = 0;
+		break;
+	case 96 ... 127:
+		i[3] = maskl(i[3], p - 96);
+		break;
+	case 128:
+		break;
+	}
+}
+#endif
+
+static int
+hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
+		   struct dsthash_dst *dst,
+		   const struct sk_buff *skb, unsigned int protoff)
+{
+	__be16 _ports[2], *ports;
+	u8 nexthdr;
+	int poff;
+
+	memset(dst, 0, sizeof(*dst));
+
+	switch (hinfo->family) {
+	case NFPROTO_IPV4:
+		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
+			dst->ip.dst = maskl(ip_hdr(skb)->daddr,
+			              hinfo->cfg.dstmask);
+		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
+			dst->ip.src = maskl(ip_hdr(skb)->saddr,
+			              hinfo->cfg.srcmask);
+
+		if (!(hinfo->cfg.mode &
+		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+			return 0;
+		nexthdr = ip_hdr(skb)->protocol;
+		break;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	case NFPROTO_IPV6:
+	{
+		__be16 frag_off;
+
+		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
+			memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
+			       sizeof(dst->ip6.dst));
+			hashlimit_ipv6_mask(dst->ip6.dst, hinfo->cfg.dstmask);
+		}
+		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) {
+			memcpy(&dst->ip6.src, &ipv6_hdr(skb)->saddr,
+			       sizeof(dst->ip6.src));
+			hashlimit_ipv6_mask(dst->ip6.src, hinfo->cfg.srcmask);
+		}
+
+		if (!(hinfo->cfg.mode &
+		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+			return 0;
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
+		if ((int)protoff < 0)
+			return -1;
+		break;
+	}
+#endif
+	default:
+		BUG();
+		return 0;
+	}
+
+	poff = proto_ports_offset(nexthdr);
+	if (poff >= 0) {
+		ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
+					   &_ports);
+	} else {
+		_ports[0] = _ports[1] = 0;
+		ports = _ports;
+	}
+	if (!ports)
+		return -1;
+	if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT)
+		dst->src_port = ports[0];
+	if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT)
+		dst->dst_port = ports[1];
+	return 0;
+}
+
+static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
+{
+	u64 tmp = xt_hashlimit_len_to_chunks(len);
+	tmp = tmp * dh->rateinfo.cost;
+
+	if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ))
+		tmp = CREDITS_PER_JIFFY_BYTES * HZ;
+
+	if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) {
+		dh->rateinfo.credit_cap--;
+		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+	}
+	return (u32) tmp;
+}
+
+static bool
+hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
+		    struct xt_hashlimit_htable *hinfo,
+		    const struct hashlimit_cfg3 *cfg, int revision)
+{
+	unsigned long now = jiffies;
+	struct dsthash_ent *dh;
+	struct dsthash_dst dst;
+	bool race = false;
+	u64 cost;
+
+	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
+		goto hotdrop;
+
+	local_bh_disable();
+	dh = dsthash_find(hinfo, &dst);
+	if (dh == NULL) {
+		dh = dsthash_alloc_init(hinfo, &dst, &race);
+		if (dh == NULL) {
+			local_bh_enable();
+			goto hotdrop;
+		} else if (race) {
+			/* Already got an entry, update expiration timeout */
+			dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+			rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
+		} else {
+			dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+			rateinfo_init(dh, hinfo, revision);
+		}
+	} else {
+		/* update expiration timeout */
+		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
+	}
+
+	if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+		dh->rateinfo.current_rate += cost;
+
+		if (!dh->rateinfo.prev_window &&
+		    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+			spin_unlock(&dh->lock);
+			local_bh_enable();
+			return !(cfg->mode & XT_HASHLIMIT_INVERT);
+		} else {
+			goto overlimit;
+		}
+	}
+
+	if (cfg->mode & XT_HASHLIMIT_BYTES)
+		cost = hashlimit_byte_cost(skb->len, dh);
+	else
+		cost = dh->rateinfo.cost;
+
+	if (dh->rateinfo.credit >= cost) {
+		/* below the limit */
+		dh->rateinfo.credit -= cost;
+		spin_unlock(&dh->lock);
+		local_bh_enable();
+		return !(cfg->mode & XT_HASHLIMIT_INVERT);
+	}
+
+overlimit:
+	spin_unlock(&dh->lock);
+	local_bh_enable();
+	/* default match is underlimit - so over the limit, we need to invert */
+	return cfg->mode & XT_HASHLIMIT_INVERT;
+
+ hotdrop:
+	par->hotdrop = true;
+	return false;
+}
+
+static bool
+hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 1);
+}
+
+static bool
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
+}
+
+#define HASHLIMIT_MAX_SIZE 1048576
+
+static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
+				     struct xt_hashlimit_htable **hinfo,
+				     struct hashlimit_cfg3 *cfg,
+				     const char *name, int revision)
+{
+	struct net *net = par->net;
+	int ret;
+
+	if (cfg->gc_interval == 0 || cfg->expire == 0)
+		return -EINVAL;
+	if (cfg->size > HASHLIMIT_MAX_SIZE) {
+		cfg->size = HASHLIMIT_MAX_SIZE;
+		pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
+	}
+	if (cfg->max > HASHLIMIT_MAX_SIZE) {
+		cfg->max = HASHLIMIT_MAX_SIZE;
+		pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
+	}
+	if (par->family == NFPROTO_IPV4) {
+		if (cfg->srcmask > 32 || cfg->dstmask > 32)
+			return -EINVAL;
+	} else {
+		if (cfg->srcmask > 128 || cfg->dstmask > 128)
+			return -EINVAL;
+	}
+
+	if (cfg->mode & ~XT_HASHLIMIT_ALL) {
+		pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n",
+				    cfg->mode);
+		return -EINVAL;
+	}
+
+	/* Check for overflow. */
+	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		if (cfg->avg == 0 || cfg->avg > U32_MAX) {
+			pr_info_ratelimited("invalid rate\n");
+			return -ERANGE;
+		}
+
+		if (cfg->interval == 0) {
+			pr_info_ratelimited("invalid interval\n");
+			return -EINVAL;
+		}
+	} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
+		if (user2credits_byte(cfg->avg) == 0) {
+			pr_info_ratelimited("overflow, rate too high: %llu\n",
+					    cfg->avg);
+			return -EINVAL;
+		}
+	} else if (cfg->burst == 0 ||
+		   user2credits(cfg->avg * cfg->burst, revision) <
+		   user2credits(cfg->avg, revision)) {
+		pr_info_ratelimited("overflow, try lower: %llu/%llu\n",
+				    cfg->avg, cfg->burst);
+		return -ERANGE;
+	}
+
+	mutex_lock(&hashlimit_mutex);
+	*hinfo = htable_find_get(net, name, par->family);
+	if (*hinfo == NULL) {
+		ret = htable_create(net, cfg, name, par->family,
+				    hinfo, revision);
+		if (ret < 0) {
+			mutex_unlock(&hashlimit_mutex);
+			return ret;
+		}
+	}
+	mutex_unlock(&hashlimit_mutex);
+
+	return 0;
+}
+
+static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 1);
+}
+
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	int ret;
+
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
+					 info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
+}
+
+static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
+}
+
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
+}
+
+static struct xt_match hashlimit_mt_reg[] __read_mostly = {
+	{
+		.name           = "hashlimit",
+		.revision       = 1,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt_v1,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo1, hinfo),
+		.checkentry     = hashlimit_mt_check_v1,
+		.destroy        = hashlimit_mt_destroy_v1,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt_v2,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
+		.checkentry     = hashlimit_mt_check,
+		.destroy        = hashlimit_mt_destroy,
+		.me             = THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name           = "hashlimit",
+		.revision       = 1,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt_v1,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo1, hinfo),
+		.checkentry     = hashlimit_mt_check_v1,
+		.destroy        = hashlimit_mt_destroy_v1,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt_v2,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
+		.checkentry     = hashlimit_mt_check,
+		.destroy        = hashlimit_mt_destroy,
+		.me             = THIS_MODULE,
+	},
+#endif
+};
+
+/* PROC stuff */
+static void *dl_seq_start(struct seq_file *s, loff_t *pos)
+	__acquires(htable->lock)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket;
+
+	spin_lock_bh(&htable->lock);
+	if (*pos >= htable->cfg.size)
+		return NULL;
+
+	bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
+	if (!bucket)
+		return ERR_PTR(-ENOMEM);
+
+	*bucket = *pos;
+	return bucket;
+}
+
+static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket = v;
+
+	*pos = ++(*bucket);
+	if (*pos >= htable->cfg.size) {
+		kfree(v);
+		return NULL;
+	}
+	return bucket;
+}
+
+static void dl_seq_stop(struct seq_file *s, void *v)
+	__releases(htable->lock)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket = v;
+
+	if (!IS_ERR(bucket))
+		kfree(bucket);
+	spin_unlock_bh(&htable->lock);
+}
+
+static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
+			 struct seq_file *s)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n",
+			   (long)(ent->expires - jiffies)/HZ,
+			   &ent->dst.ip.src,
+			   ntohs(ent->dst.src_port),
+			   &ent->dst.ip.dst,
+			   ntohs(ent->dst.dst_port),
+			   ent->rateinfo.credit, ent->rateinfo.credit_cap,
+			   ent->rateinfo.cost);
+		break;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	case NFPROTO_IPV6:
+		seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n",
+			   (long)(ent->expires - jiffies)/HZ,
+			   &ent->dst.ip6.src,
+			   ntohs(ent->dst.src_port),
+			   &ent->dst.ip6.dst,
+			   ntohs(ent->dst.dst_port),
+			   ent->rateinfo.credit, ent->rateinfo.credit_cap,
+			   ent->rateinfo.cost);
+		break;
+#endif
+	default:
+		BUG();
+	}
+}
+
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
+static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+			    struct seq_file *s)
+{
+	struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v2(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
+static int dl_seq_show_v1(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket = v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v1(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
+static int dl_seq_show(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
+	unsigned int *bucket = v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
+static const struct seq_operations dl_seq_ops_v1 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v1
+};
+
+static const struct seq_operations dl_seq_ops_v2 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v2
+};
+
+static const struct seq_operations dl_seq_ops = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show
+};
+
+static int __net_init hashlimit_proc_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
+	if (!hashlimit_net->ipt_hashlimit)
+		return -ENOMEM;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
+	if (!hashlimit_net->ip6t_hashlimit) {
+		remove_proc_entry("ipt_hashlimit", net->proc_net);
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void __net_exit hashlimit_proc_net_exit(struct net *net)
+{
+	struct xt_hashlimit_htable *hinfo;
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	/* hashlimit_net_exit() is called before hashlimit_mt_destroy().
+	 * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc
+	 * entries is empty before trying to remove it.
+	 */
+	mutex_lock(&hashlimit_mutex);
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
+		htable_remove_proc_entry(hinfo);
+	hashlimit_net->ipt_hashlimit = NULL;
+	hashlimit_net->ip6t_hashlimit = NULL;
+	mutex_unlock(&hashlimit_mutex);
+
+	remove_proc_entry("ipt_hashlimit", net->proc_net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	remove_proc_entry("ip6t_hashlimit", net->proc_net);
+#endif
+}
+
+static int __net_init hashlimit_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	INIT_HLIST_HEAD(&hashlimit_net->htables);
+	return hashlimit_proc_net_init(net);
+}
+
+static void __net_exit hashlimit_net_exit(struct net *net)
+{
+	hashlimit_proc_net_exit(net);
+}
+
+static struct pernet_operations hashlimit_net_ops = {
+	.init	= hashlimit_net_init,
+	.exit	= hashlimit_net_exit,
+	.id	= &hashlimit_net_id,
+	.size	= sizeof(struct hashlimit_net),
+};
+
+static int __init hashlimit_mt_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&hashlimit_net_ops);
+	if (err < 0)
+		return err;
+	err = xt_register_matches(hashlimit_mt_reg,
+	      ARRAY_SIZE(hashlimit_mt_reg));
+	if (err < 0)
+		goto err1;
+
+	err = -ENOMEM;
+	hashlimit_cachep = kmem_cache_create("xt_hashlimit",
+					    sizeof(struct dsthash_ent), 0, 0,
+					    NULL);
+	if (!hashlimit_cachep) {
+		pr_warn("unable to create slab cache\n");
+		goto err2;
+	}
+	return 0;
+
+err2:
+	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+err1:
+	unregister_pernet_subsys(&hashlimit_net_ops);
+	return err;
+
+}
+
+static void __exit hashlimit_mt_exit(void)
+{
+	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+	unregister_pernet_subsys(&hashlimit_net_ops);
+
+	rcu_barrier();
+	kmem_cache_destroy(hashlimit_cachep);
+}
+
+module_init(hashlimit_mt_init);
+module_exit(hashlimit_mt_exit);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
new file mode 100644
index 000000000..a5a167f94
--- /dev/null
+++ b/net/netfilter/xt_helper.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* iptables module to match on related connections */
+/*
+ * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Related connection matching");
+MODULE_ALIAS("ipt_helper");
+MODULE_ALIAS("ip6t_helper");
+
+
+static bool
+helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_helper_info *info = par->matchinfo;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *master_help;
+	const struct nf_conntrack_helper *helper;
+	enum ip_conntrack_info ctinfo;
+	bool ret = info->invert;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || !ct->master)
+		return ret;
+
+	master_help = nfct_help(ct->master);
+	if (!master_help)
+		return ret;
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(master_help->helper);
+	if (!helper)
+		return ret;
+
+	if (info->name[0] == '\0')
+		ret = !ret;
+	else
+		ret ^= !strncmp(helper->name, info->name,
+				strlen(helper->name));
+	return ret;
+}
+
+static int helper_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_helper_info *info = par->matchinfo;
+	int ret;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0) {
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+		return ret;
+	}
+	info->name[sizeof(info->name) - 1] = '\0';
+	return 0;
+}
+
+static void helper_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match helper_mt_reg __read_mostly = {
+	.name       = "helper",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = helper_mt_check,
+	.match      = helper_mt,
+	.destroy    = helper_mt_destroy,
+	.matchsize  = sizeof(struct xt_helper_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init helper_mt_init(void)
+{
+	return xt_register_match(&helper_mt_reg);
+}
+
+static void __exit helper_mt_exit(void)
+{
+	xt_unregister_match(&helper_mt_reg);
+}
+
+module_init(helper_mt_init);
+module_exit(helper_mt_exit);
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
new file mode 100644
index 000000000..c1a70f8f0
--- /dev/null
+++ b/net/netfilter/xt_hl.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IP tables module for matching the value of the TTL
+ * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
+ *
+ * Hop Limit matching module
+ * (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_ttl.h>
+#include <linux/netfilter_ipv6/ip6t_hl.h>
+
+MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
+MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ttl");
+MODULE_ALIAS("ip6t_hl");
+
+static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ipt_ttl_info *info = par->matchinfo;
+	const u8 ttl = ip_hdr(skb)->ttl;
+
+	switch (info->mode) {
+	case IPT_TTL_EQ:
+		return ttl == info->ttl;
+	case IPT_TTL_NE:
+		return ttl != info->ttl;
+	case IPT_TTL_LT:
+		return ttl < info->ttl;
+	case IPT_TTL_GT:
+		return ttl > info->ttl;
+	}
+
+	return false;
+}
+
+static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ip6t_hl_info *info = par->matchinfo;
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+	switch (info->mode) {
+	case IP6T_HL_EQ:
+		return ip6h->hop_limit == info->hop_limit;
+	case IP6T_HL_NE:
+		return ip6h->hop_limit != info->hop_limit;
+	case IP6T_HL_LT:
+		return ip6h->hop_limit < info->hop_limit;
+	case IP6T_HL_GT:
+		return ip6h->hop_limit > info->hop_limit;
+	}
+
+	return false;
+}
+
+static struct xt_match hl_mt_reg[] __read_mostly = {
+	{
+		.name       = "ttl",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.match      = ttl_mt,
+		.matchsize  = sizeof(struct ipt_ttl_info),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "hl",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.match      = hl_mt6,
+		.matchsize  = sizeof(struct ip6t_hl_info),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init hl_mt_init(void)
+{
+	return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
+}
+
+static void __exit hl_mt_exit(void)
+{
+	xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
+}
+
+module_init(hl_mt_init);
+module_exit(hl_mt_exit);
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
new file mode 100644
index 000000000..472da639a
--- /dev/null
+++ b/net/netfilter/xt_ipcomp.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  Kernel module to match IPComp parameters for IPv4 and IPv6
+ *
+ *  Copyright (C) 2013 WindRiver
+ *
+ *  Author:
+ *  Fan Du <fan.du@windriver.com>
+ *
+ *  Based on:
+ *  net/netfilter/xt_esp.c
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter/xt_ipcomp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
+MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
+MODULE_ALIAS("ipt_ipcomp");
+MODULE_ALIAS("ip6t_ipcomp");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+	bool r;
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
+	r = (spi >= min && spi <= max) ^ invert;
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
+}
+
+static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct ip_comp_hdr _comphdr;
+	const struct ip_comp_hdr *chdr;
+	const struct xt_ipcomp *compinfo = par->matchinfo;
+
+	/* Must not be a fragment. */
+	if (par->fragoff != 0)
+		return false;
+
+	chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr);
+	if (chdr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		pr_debug("Dropping evil IPComp tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	return spi_match(compinfo->spis[0], compinfo->spis[1],
+			 ntohs(chdr->cpi),
+			 !!(compinfo->invflags & XT_IPCOMP_INV_SPI));
+}
+
+static int comp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_ipcomp *compinfo = par->matchinfo;
+
+	/* Must specify no unknown invflags */
+	if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
+		pr_info_ratelimited("unknown flags %X\n", compinfo->invflags);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_match comp_mt_reg[] __read_mostly = {
+	{
+		.name		= "ipcomp",
+		.family		= NFPROTO_IPV4,
+		.match		= comp_mt,
+		.matchsize	= sizeof(struct xt_ipcomp),
+		.proto		= IPPROTO_COMP,
+		.checkentry	= comp_mt_check,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "ipcomp",
+		.family		= NFPROTO_IPV6,
+		.match		= comp_mt,
+		.matchsize	= sizeof(struct xt_ipcomp),
+		.proto		= IPPROTO_COMP,
+		.checkentry	= comp_mt_check,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init comp_mt_init(void)
+{
+	return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+static void __exit comp_mt_exit(void)
+{
+	xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+module_init(comp_mt_init);
+module_exit(comp_mt_exit);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
new file mode 100644
index 000000000..0c9e014e3
--- /dev/null
+++ b/net/netfilter/xt_iprange.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_iprange - Netfilter module to match IP address ranges
+ *
+ *	(C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org>
+ *	(C) CC Computer Consultants GmbH, 2008
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_iprange.h>
+
+static bool
+iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+	bool m;
+
+	if (info->flags & IPRANGE_SRC) {
+		m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
+		m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
+		if (m) {
+			pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
+			         &iph->saddr,
+			         (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+			         &info->src_min.ip,
+			         &info->src_max.ip);
+			return false;
+		}
+	}
+	if (info->flags & IPRANGE_DST) {
+		m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
+		m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
+		m ^= !!(info->flags & IPRANGE_DST_INV);
+		if (m) {
+			pr_debug("dst IP %pI4 NOT in range %s%pI4-%pI4\n",
+			         &iph->daddr,
+			         (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+			         &info->dst_min.ip,
+			         &info->dst_max.ip);
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline int
+iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b)
+{
+	unsigned int i;
+
+	for (i = 0; i < 4; ++i) {
+		if (a->s6_addr32[i] != b->s6_addr32[i])
+			return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]);
+	}
+
+	return 0;
+}
+
+static bool
+iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	bool m;
+
+	if (info->flags & IPRANGE_SRC) {
+		m  = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
+		m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
+		if (m) {
+			pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->saddr,
+				 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+				 &info->src_min.in6,
+				 &info->src_max.in6);
+			return false;
+		}
+	}
+	if (info->flags & IPRANGE_DST) {
+		m  = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
+		m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
+		m ^= !!(info->flags & IPRANGE_DST_INV);
+		if (m) {
+			pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->daddr,
+				 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+				 &info->dst_min.in6,
+				 &info->dst_max.in6);
+			return false;
+		}
+	}
+	return true;
+}
+
+static struct xt_match iprange_mt_reg[] __read_mostly = {
+	{
+		.name      = "iprange",
+		.revision  = 1,
+		.family    = NFPROTO_IPV4,
+		.match     = iprange_mt4,
+		.matchsize = sizeof(struct xt_iprange_mtinfo),
+		.me        = THIS_MODULE,
+	},
+	{
+		.name      = "iprange",
+		.revision  = 1,
+		.family    = NFPROTO_IPV6,
+		.match     = iprange_mt6,
+		.matchsize = sizeof(struct xt_iprange_mtinfo),
+		.me        = THIS_MODULE,
+	},
+};
+
+static int __init iprange_mt_init(void)
+{
+	return xt_register_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
+}
+
+static void __exit iprange_mt_exit(void)
+{
+	xt_unregister_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
+}
+
+module_init(iprange_mt_init);
+module_exit(iprange_mt_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
+MODULE_ALIAS("ipt_iprange");
+MODULE_ALIAS("ip6t_iprange");
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 000000000..253c71cc9
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_ipvs - kernel module to match IPVS connection properties
+ *
+ *	Author: Hannes Eder <heder@google.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#endif
+#include <linux/ip_vs.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ipvs.h>
+#include <net/netfilter/nf_conntrack.h>
+
+#include <net/ip_vs.h>
+
+MODULE_AUTHOR("Hannes Eder <heder@google.com>");
+MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ipvs");
+MODULE_ALIAS("ip6t_ipvs");
+
+/* borrowed from xt_conntrack */
+static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
+			    const union nf_inet_addr *uaddr,
+			    const union nf_inet_addr *umask,
+			    unsigned int l3proto)
+{
+	if (l3proto == NFPROTO_IPV4)
+		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+#ifdef CONFIG_IP_VS_IPV6
+	else if (l3proto == NFPROTO_IPV6)
+		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+		       &uaddr->in6) == 0;
+#endif
+	else
+		return false;
+}
+
+static bool
+ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_ipvs_mtinfo *data = par->matchinfo;
+	struct netns_ipvs *ipvs = net_ipvs(xt_net(par));
+	/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
+	const u_int8_t family = xt_family(par);
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	bool match = true;
+
+	if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
+		match = skb->ipvs_property ^
+			!!(data->invert & XT_IPVS_IPVS_PROPERTY);
+		goto out;
+	}
+
+	/* other flags than XT_IPVS_IPVS_PROPERTY are set */
+	if (!skb->ipvs_property) {
+		match = false;
+		goto out;
+	}
+
+	ip_vs_fill_iph_skb(family, skb, true, &iph);
+
+	if (data->bitmask & XT_IPVS_PROTO)
+		if ((iph.protocol == data->l4proto) ^
+		    !(data->invert & XT_IPVS_PROTO)) {
+			match = false;
+			goto out;
+		}
+
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(ipvs, family, skb, &iph);
+	if (unlikely(cp == NULL)) {
+		match = false;
+		goto out;
+	}
+
+	/*
+	 * We found a connection, i.e. ct != 0, make sure to call
+	 * __ip_vs_conn_put before returning.  In our case jump to out_put_con.
+	 */
+
+	if (data->bitmask & XT_IPVS_VPORT)
+		if ((cp->vport == data->vport) ^
+		    !(data->invert & XT_IPVS_VPORT)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VPORTCTL)
+		if ((cp->control != NULL &&
+		     cp->control->vport == data->vportctl) ^
+		    !(data->invert & XT_IPVS_VPORTCTL)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_DIR) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct == NULL) {
+			match = false;
+			goto out_put_cp;
+		}
+
+		if ((ctinfo >= IP_CT_IS_REPLY) ^
+		    !!(data->invert & XT_IPVS_DIR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+	if (data->bitmask & XT_IPVS_METHOD)
+		if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
+		    !(data->invert & XT_IPVS_METHOD)) {
+			match = false;
+			goto out_put_cp;
+		}
+
+	if (data->bitmask & XT_IPVS_VADDR) {
+		if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
+				    &data->vmask, family) ^
+		    !(data->invert & XT_IPVS_VADDR)) {
+			match = false;
+			goto out_put_cp;
+		}
+	}
+
+out_put_cp:
+	__ip_vs_conn_put(cp);
+out:
+	pr_debug("match=%d\n", match);
+	return match;
+}
+
+static int ipvs_mt_check(const struct xt_mtchk_param *par)
+{
+	if (par->family != NFPROTO_IPV4
+#ifdef CONFIG_IP_VS_IPV6
+	    && par->family != NFPROTO_IPV6
+#endif
+		) {
+		pr_info_ratelimited("protocol family %u not supported\n",
+				    par->family);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match xt_ipvs_mt_reg __read_mostly = {
+	.name       = "ipvs",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = ipvs_mt,
+	.checkentry = ipvs_mt_check,
+	.matchsize  = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
+	.me         = THIS_MODULE,
+};
+
+static int __init ipvs_mt_init(void)
+{
+	return xt_register_match(&xt_ipvs_mt_reg);
+}
+
+static void __exit ipvs_mt_exit(void)
+{
+	xt_unregister_match(&xt_ipvs_mt_reg);
+}
+
+module_init(ipvs_mt_init);
+module_exit(ipvs_mt_exit);
diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c
new file mode 100644
index 000000000..a61eb81e9
--- /dev/null
+++ b/net/netfilter/xt_l2tp.c
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match L2TP header parameters. */
+
+/* (C) 2013      James Chapman <jchapman@katalix.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <linux/l2tp.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_l2tp.h>
+
+/* L2TP header masks */
+#define L2TP_HDR_T_BIT	0x8000
+#define L2TP_HDR_L_BIT	0x4000
+#define L2TP_HDR_VER	0x000f
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("Xtables: L2TP header match");
+MODULE_ALIAS("ipt_l2tp");
+MODULE_ALIAS("ip6t_l2tp");
+
+/* The L2TP fields that can be matched */
+struct l2tp_data {
+	u32 tid;
+	u32 sid;
+	u8 type;
+	u8 version;
+};
+
+union l2tp_val {
+	__be16 val16[2];
+	__be32 val32;
+};
+
+static bool l2tp_match(const struct xt_l2tp_info *info, struct l2tp_data *data)
+{
+	if ((info->flags & XT_L2TP_TYPE) && (info->type != data->type))
+		return false;
+
+	if ((info->flags & XT_L2TP_VERSION) && (info->version != data->version))
+		return false;
+
+	/* Check tid only for L2TPv3 control or any L2TPv2 packets */
+	if ((info->flags & XT_L2TP_TID) &&
+	    ((data->type == XT_L2TP_TYPE_CONTROL) || (data->version == 2)) &&
+	    (info->tid != data->tid))
+		return false;
+
+	/* Check sid only for L2TP data packets */
+	if ((info->flags & XT_L2TP_SID) && (data->type == XT_L2TP_TYPE_DATA) &&
+	    (info->sid != data->sid))
+		return false;
+
+	return true;
+}
+
+/* Parse L2TP header fields when UDP encapsulation is used. Handles
+ * L2TPv2 and L2TPv3. Note the L2TPv3 control and data packets have a
+ * different format. See
+ * RFC2661, Section 3.1, L2TPv2 Header Format
+ * RFC3931, Section 3.2.1, L2TPv3 Control Message Header
+ * RFC3931, Section 3.2.2, L2TPv3 Data Message Header
+ * RFC3931, Section 4.1.2.1, L2TPv3 Session Header over UDP
+ */
+static bool l2tp_udp_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff)
+{
+	const struct xt_l2tp_info *info = par->matchinfo;
+	int uhlen = sizeof(struct udphdr);
+	int offs = thoff + uhlen;
+	union l2tp_val *lh;
+	union l2tp_val lhbuf;
+	u16 flags;
+	struct l2tp_data data = { 0, };
+
+	if (par->fragoff != 0)
+		return false;
+
+	/* Extract L2TP header fields. The flags in the first 16 bits
+	 * tell us where the other fields are.
+	 */
+	lh = skb_header_pointer(skb, offs, 2, &lhbuf);
+	if (lh == NULL)
+		return false;
+
+	flags = ntohs(lh->val16[0]);
+	if (flags & L2TP_HDR_T_BIT)
+		data.type = XT_L2TP_TYPE_CONTROL;
+	else
+		data.type = XT_L2TP_TYPE_DATA;
+	data.version = (u8) flags & L2TP_HDR_VER;
+
+	/* Now extract the L2TP tid/sid. These are in different places
+	 * for L2TPv2 (rfc2661) and L2TPv3 (rfc3931). For L2TPv2, we
+	 * must also check to see if the length field is present,
+	 * since this affects the offsets into the packet of the
+	 * tid/sid fields.
+	 */
+	if (data.version == 3) {
+		lh = skb_header_pointer(skb, offs + 4, 4, &lhbuf);
+		if (lh == NULL)
+			return false;
+		if (data.type == XT_L2TP_TYPE_CONTROL)
+			data.tid = ntohl(lh->val32);
+		else
+			data.sid = ntohl(lh->val32);
+	} else if (data.version == 2) {
+		if (flags & L2TP_HDR_L_BIT)
+			offs += 2;
+		lh = skb_header_pointer(skb, offs + 2, 4, &lhbuf);
+		if (lh == NULL)
+			return false;
+		data.tid = (u32) ntohs(lh->val16[0]);
+		data.sid = (u32) ntohs(lh->val16[1]);
+	} else
+		return false;
+
+	return l2tp_match(info, &data);
+}
+
+/* Parse L2TP header fields for IP encapsulation (no UDP header).
+ * L2TPv3 data packets have a different form with IP encap. See
+ * RC3931, Section 4.1.1.1, L2TPv3 Session Header over IP.
+ * RC3931, Section 4.1.1.2, L2TPv3 Control and Data Traffic over IP.
+ */
+static bool l2tp_ip_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff)
+{
+	const struct xt_l2tp_info *info = par->matchinfo;
+	union l2tp_val *lh;
+	union l2tp_val lhbuf;
+	struct l2tp_data data = { 0, };
+
+	/* For IP encap, the L2TP sid is the first 32-bits. */
+	lh = skb_header_pointer(skb, thoff, sizeof(lhbuf), &lhbuf);
+	if (lh == NULL)
+		return false;
+	if (lh->val32 == 0) {
+		/* Must be a control packet. The L2TP tid is further
+		 * into the packet.
+		 */
+		data.type = XT_L2TP_TYPE_CONTROL;
+		lh = skb_header_pointer(skb, thoff + 8, sizeof(lhbuf),
+					&lhbuf);
+		if (lh == NULL)
+			return false;
+		data.tid = ntohl(lh->val32);
+	} else {
+		data.sid = ntohl(lh->val32);
+		data.type = XT_L2TP_TYPE_DATA;
+	}
+
+	data.version = 3;
+
+	return l2tp_match(info, &data);
+}
+
+static bool l2tp_mt4(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	u8 ipproto = iph->protocol;
+
+	/* l2tp_mt_check4 already restricts the transport protocol */
+	switch (ipproto) {
+	case IPPROTO_UDP:
+		return l2tp_udp_mt(skb, par, par->thoff);
+	case IPPROTO_L2TP:
+		return l2tp_ip_mt(skb, par, par->thoff);
+	}
+
+	return false;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static bool l2tp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	unsigned int thoff = 0;
+	unsigned short fragoff = 0;
+	int ipproto;
+
+	ipproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL);
+	if (fragoff != 0)
+		return false;
+
+	/* l2tp_mt_check6 already restricts the transport protocol */
+	switch (ipproto) {
+	case IPPROTO_UDP:
+		return l2tp_udp_mt(skb, par, thoff);
+	case IPPROTO_L2TP:
+		return l2tp_ip_mt(skb, par, thoff);
+	}
+
+	return false;
+}
+#endif
+
+static int l2tp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_l2tp_info *info = par->matchinfo;
+
+	/* Check for invalid flags */
+	if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION |
+			    XT_L2TP_TYPE)) {
+		pr_info_ratelimited("unknown flags: %x\n", info->flags);
+		return -EINVAL;
+	}
+
+	/* At least one of tid, sid or type=control must be specified */
+	if ((!(info->flags & XT_L2TP_TID)) &&
+	    (!(info->flags & XT_L2TP_SID)) &&
+	    ((!(info->flags & XT_L2TP_TYPE)) ||
+	     (info->type != XT_L2TP_TYPE_CONTROL))) {
+		pr_info_ratelimited("invalid flags combination: %x\n",
+				    info->flags);
+		return -EINVAL;
+	}
+
+	/* If version 2 is specified, check that incompatible params
+	 * are not supplied
+	 */
+	if (info->flags & XT_L2TP_VERSION) {
+		if ((info->version < 2) || (info->version > 3)) {
+			pr_info_ratelimited("wrong L2TP version: %u\n",
+					    info->version);
+			return -EINVAL;
+		}
+
+		if (info->version == 2) {
+			if ((info->flags & XT_L2TP_TID) &&
+			    (info->tid > 0xffff)) {
+				pr_info_ratelimited("v2 tid > 0xffff: %u\n",
+						    info->tid);
+				return -EINVAL;
+			}
+			if ((info->flags & XT_L2TP_SID) &&
+			    (info->sid > 0xffff)) {
+				pr_info_ratelimited("v2 sid > 0xffff: %u\n",
+						    info->sid);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int l2tp_mt_check4(const struct xt_mtchk_param *par)
+{
+	const struct xt_l2tp_info *info = par->matchinfo;
+	const struct ipt_entry *e = par->entryinfo;
+	const struct ipt_ip *ip = &e->ip;
+	int ret;
+
+	ret = l2tp_mt_check(par);
+	if (ret != 0)
+		return ret;
+
+	if ((ip->proto != IPPROTO_UDP) &&
+	    (ip->proto != IPPROTO_L2TP)) {
+		pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
+		return -EINVAL;
+	}
+
+	if ((ip->proto == IPPROTO_L2TP) &&
+	    (info->version == 2)) {
+		pr_info_ratelimited("v2 doesn't support IP mode\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static int l2tp_mt_check6(const struct xt_mtchk_param *par)
+{
+	const struct xt_l2tp_info *info = par->matchinfo;
+	const struct ip6t_entry *e = par->entryinfo;
+	const struct ip6t_ip6 *ip = &e->ipv6;
+	int ret;
+
+	ret = l2tp_mt_check(par);
+	if (ret != 0)
+		return ret;
+
+	if ((ip->proto != IPPROTO_UDP) &&
+	    (ip->proto != IPPROTO_L2TP)) {
+		pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n");
+		return -EINVAL;
+	}
+
+	if ((ip->proto == IPPROTO_L2TP) &&
+	    (info->version == 2)) {
+		pr_info_ratelimited("v2 doesn't support IP mode\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
+static struct xt_match l2tp_mt_reg[] __read_mostly = {
+	{
+		.name      = "l2tp",
+		.revision  = 0,
+		.family    = NFPROTO_IPV4,
+		.match     = l2tp_mt4,
+		.matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)),
+		.checkentry = l2tp_mt_check4,
+		.hooks     = ((1 << NF_INET_PRE_ROUTING) |
+			      (1 << NF_INET_LOCAL_IN) |
+			      (1 << NF_INET_LOCAL_OUT) |
+			      (1 << NF_INET_FORWARD)),
+		.me        = THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name      = "l2tp",
+		.revision  = 0,
+		.family    = NFPROTO_IPV6,
+		.match     = l2tp_mt6,
+		.matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)),
+		.checkentry = l2tp_mt_check6,
+		.hooks     = ((1 << NF_INET_PRE_ROUTING) |
+			      (1 << NF_INET_LOCAL_IN) |
+			      (1 << NF_INET_LOCAL_OUT) |
+			      (1 << NF_INET_FORWARD)),
+		.me        = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init l2tp_mt_init(void)
+{
+	return xt_register_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg));
+}
+
+static void __exit l2tp_mt_exit(void)
+{
+	xt_unregister_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg));
+}
+
+module_init(l2tp_mt_init);
+module_exit(l2tp_mt_exit);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
new file mode 100644
index 000000000..ca730cedb
--- /dev/null
+++ b/net/netfilter/xt_length.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match packet length. */
+/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+
+#include <linux/netfilter/xt_length.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Xtables: Packet length (Layer3,4,5) match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_length");
+MODULE_ALIAS("ip6t_length");
+
+static bool
+length_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_length_info *info = par->matchinfo;
+	u32 pktlen = skb_ip_totlen(skb);
+
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static bool
+length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_length_info *info = par->matchinfo;
+	u32 pktlen = skb->len;
+
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static struct xt_match length_mt_reg[] __read_mostly = {
+	{
+		.name		= "length",
+		.family		= NFPROTO_IPV4,
+		.match		= length_mt,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "length",
+		.family		= NFPROTO_IPV6,
+		.match		= length_mt6,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init length_mt_init(void)
+{
+	return xt_register_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
+}
+
+static void __exit length_mt_exit(void)
+{
+	xt_unregister_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
+}
+
+module_init(length_mt_init);
+module_exit(length_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
new file mode 100644
index 000000000..8b4fd2785
--- /dev/null
+++ b/net/netfilter/xt_limit.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
+ * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_limit.h>
+
+struct xt_limit_priv {
+	unsigned long prev;
+	u32 credit;
+};
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+MODULE_DESCRIPTION("Xtables: rate-limit match");
+MODULE_ALIAS("ipt_limit");
+MODULE_ALIAS("ip6t_limit");
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+   this algorithm.  The `average rate' in jiffies becomes your initial
+   amount of credit `credit' and the most credit you can ever have
+   `credit_cap'.  The `peak rate' becomes the cost of passing the
+   test, `cost'.
+
+   `prev' tracks the last packet hit: you gain one credit per jiffy.
+   If you get credit balance more than this, the extra credit is
+   discarded.  Every time the match passes, you lose `cost' credits;
+   if you don't have that many, the test fails.
+
+   See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+   To get the maximum range, we multiply by this factor (ie. you get N
+   credits per jiffy).  We want to allow a rate as low as 1 per day
+   (slowest userspace tool allows), which means
+   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+static bool
+limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_rateinfo *r = par->matchinfo;
+	struct xt_limit_priv *priv = r->master;
+	unsigned long now;
+	u32 old_credit, new_credit, credit_increase = 0;
+	bool ret;
+
+	/* fastpath if there is nothing to update */
+	if ((READ_ONCE(priv->credit) < r->cost) && (READ_ONCE(priv->prev) == jiffies))
+		return false;
+
+	do {
+		now = jiffies;
+		credit_increase += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
+		old_credit = READ_ONCE(priv->credit);
+		new_credit = old_credit;
+		new_credit += credit_increase;
+		if (new_credit > r->credit_cap)
+			new_credit = r->credit_cap;
+		if (new_credit >= r->cost) {
+			ret = true;
+			new_credit -= r->cost;
+		} else {
+			ret = false;
+		}
+	} while (cmpxchg(&priv->credit, old_credit, new_credit) != old_credit);
+
+	return ret;
+}
+
+/* Precision saver. */
+static u32 user2credits(u32 user)
+{
+	/* If multiplying would overflow... */
+	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+		/* Divide first. */
+		return (user / XT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
+}
+
+static int limit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_rateinfo *r = par->matchinfo;
+	struct xt_limit_priv *priv;
+
+	/* Check for overflow. */
+	if (r->burst == 0
+	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+		pr_info_ratelimited("Overflow, try lower: %u/%u\n",
+				    r->avg, r->burst);
+		return -ERANGE;
+	}
+
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	/* For SMP, we only want to use one set of state. */
+	r->master = priv;
+	/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+	   128. */
+	priv->prev = jiffies;
+	priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
+	if (r->cost == 0) {
+		r->credit_cap = priv->credit; /* Credits full. */
+		r->cost = user2credits(r->avg);
+	}
+
+	return 0;
+}
+
+static void limit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_rateinfo *info = par->matchinfo;
+
+	kfree(info->master);
+}
+
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;
+	u_int32_t burst;
+
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	u_int32_t master;
+};
+
+/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
+ * master pointer, which does not need to be preserved. */
+static void limit_mt_compat_from_user(void *dst, const void *src)
+{
+	const struct compat_xt_rateinfo *cm = src;
+	struct xt_rateinfo m = {
+		.avg		= cm->avg,
+		.burst		= cm->burst,
+		.prev		= cm->prev | (unsigned long)cm->master << 32,
+		.credit		= cm->credit,
+		.credit_cap	= cm->credit_cap,
+		.cost		= cm->cost,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int limit_mt_compat_to_user(void __user *dst, const void *src)
+{
+	const struct xt_rateinfo *m = src;
+	struct compat_xt_rateinfo cm = {
+		.avg		= m->avg,
+		.burst		= m->burst,
+		.prev		= m->prev,
+		.credit		= m->credit,
+		.credit_cap	= m->credit_cap,
+		.cost		= m->cost,
+		.master		= m->prev >> 32,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
+
+static struct xt_match limit_mt_reg __read_mostly = {
+	.name             = "limit",
+	.revision         = 0,
+	.family           = NFPROTO_UNSPEC,
+	.match            = limit_mt,
+	.checkentry       = limit_mt_check,
+	.destroy          = limit_mt_destroy,
+	.matchsize        = sizeof(struct xt_rateinfo),
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
+	.compatsize       = sizeof(struct compat_xt_rateinfo),
+	.compat_from_user = limit_mt_compat_from_user,
+	.compat_to_user   = limit_mt_compat_to_user,
+#endif
+	.usersize         = offsetof(struct xt_rateinfo, prev),
+	.me               = THIS_MODULE,
+};
+
+static int __init limit_mt_init(void)
+{
+	return xt_register_match(&limit_mt_reg);
+}
+
+static void __exit limit_mt_exit(void)
+{
+	xt_unregister_match(&limit_mt_reg);
+}
+
+module_init(limit_mt_init);
+module_exit(limit_mt_exit);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
new file mode 100644
index 000000000..81649da57
--- /dev/null
+++ b/net/netfilter/xt_mac.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match MAC address parameters. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/xt_mac.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: MAC address match");
+MODULE_ALIAS("ipt_mac");
+MODULE_ALIAS("ip6t_mac");
+
+static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_mac_info *info = par->matchinfo;
+	bool ret;
+
+	if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER)
+		return false;
+	if (skb_mac_header(skb) < skb->head)
+		return false;
+	if (skb_mac_header(skb) + ETH_HLEN > skb->data)
+		return false;
+	ret  = ether_addr_equal(eth_hdr(skb)->h_source, info->srcaddr);
+	ret ^= info->invert;
+	return ret;
+}
+
+static struct xt_match mac_mt_reg __read_mostly = {
+	.name      = "mac",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = mac_mt,
+	.matchsize = sizeof(struct xt_mac_info),
+	.hooks     = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+	             (1 << NF_INET_FORWARD),
+	.me        = THIS_MODULE,
+};
+
+static int __init mac_mt_init(void)
+{
+	return xt_register_match(&mac_mt_reg);
+}
+
+static void __exit mac_mt_exit(void)
+{
+	xt_unregister_match(&mac_mt_reg);
+}
+
+module_init(mac_mt_init);
+module_exit(mac_mt_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
new file mode 100644
index 000000000..1ad74b592
--- /dev/null
+++ b/net/netfilter/xt_mark.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_mark - Netfilter module to match NFMARK value
+ *
+ *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *	Jan Engelhardt <jengelh@medozas.de>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/xt_mark.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: packet mark operations");
+MODULE_ALIAS("ipt_mark");
+MODULE_ALIAS("ip6t_mark");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
+MODULE_ALIAS("arpt_MARK");
+
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_mark_tginfo2 *info = par->targinfo;
+
+	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+	return XT_CONTINUE;
+}
+
+static bool
+mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_mark_mtinfo1 *info = par->matchinfo;
+
+	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static struct xt_target mark_tg_reg __read_mostly = {
+	.name           = "MARK",
+	.revision       = 2,
+	.family         = NFPROTO_UNSPEC,
+	.target         = mark_tg,
+	.targetsize     = sizeof(struct xt_mark_tginfo2),
+	.me             = THIS_MODULE,
+};
+
+static struct xt_match mark_mt_reg __read_mostly = {
+	.name           = "mark",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.match          = mark_mt,
+	.matchsize      = sizeof(struct xt_mark_mtinfo1),
+	.me             = THIS_MODULE,
+};
+
+static int __init mark_mt_init(void)
+{
+	int ret;
+
+	ret = xt_register_target(&mark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&mark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&mark_tg_reg);
+		return ret;
+	}
+	return 0;
+}
+
+static void __exit mark_mt_exit(void)
+{
+	xt_unregister_match(&mark_mt_reg);
+	xt_unregister_target(&mark_tg_reg);
+}
+
+module_init(mark_mt_init);
+module_exit(mark_mt_exit);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
new file mode 100644
index 000000000..44a00f5ac
--- /dev/null
+++ b/net/netfilter/xt_multiport.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports:
+   ports are in the same place so we can treat them as equal. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+
+#include <linux/netfilter/xt_multiport.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP");
+MODULE_ALIAS("ipt_multiport");
+MODULE_ALIAS("ip6t_multiport");
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline bool
+ports_match_v1(const struct xt_multiport_v1 *minfo,
+	       u_int16_t src, u_int16_t dst)
+{
+	unsigned int i;
+	u_int16_t s, e;
+
+	for (i = 0; i < minfo->count; i++) {
+		s = minfo->ports[i];
+
+		if (minfo->pflags[i]) {
+			/* range port matching */
+			e = minfo->ports[++i];
+			pr_debug("src or dst matches with %d-%d?\n", s, e);
+
+			switch (minfo->flags) {
+			case XT_MULTIPORT_SOURCE:
+				if (src >= s && src <= e)
+					return true ^ minfo->invert;
+				break;
+			case XT_MULTIPORT_DESTINATION:
+				if (dst >= s && dst <= e)
+					return true ^ minfo->invert;
+				break;
+			case XT_MULTIPORT_EITHER:
+				if ((dst >= s && dst <= e) ||
+				    (src >= s && src <= e))
+					return true ^ minfo->invert;
+				break;
+			default:
+				break;
+			}
+		} else {
+			/* exact port matching */
+			pr_debug("src or dst matches with %d?\n", s);
+
+			switch (minfo->flags) {
+			case XT_MULTIPORT_SOURCE:
+				if (src == s)
+					return true ^ minfo->invert;
+				break;
+			case XT_MULTIPORT_DESTINATION:
+				if (dst == s)
+					return true ^ minfo->invert;
+				break;
+			case XT_MULTIPORT_EITHER:
+				if (src == s || dst == s)
+					return true ^ minfo->invert;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	return minfo->invert;
+}
+
+static bool
+multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const __be16 *pptr;
+	__be16 _ports[2];
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+	if (par->fragoff != 0)
+		return false;
+
+	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		pr_debug("Dropping evil offset=0 tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+static inline bool
+check(u_int16_t proto,
+      u_int8_t ip_invflags,
+      u_int8_t match_flags,
+      u_int8_t count)
+{
+	/* Must specify supported protocol, no unknown flags or bad count */
+	return (proto == IPPROTO_TCP || proto == IPPROTO_UDP
+		|| proto == IPPROTO_UDPLITE
+		|| proto == IPPROTO_SCTP || proto == IPPROTO_DCCP)
+		&& !(ip_invflags & XT_INV_PROTO)
+		&& (match_flags == XT_MULTIPORT_SOURCE
+		    || match_flags == XT_MULTIPORT_DESTINATION
+		    || match_flags == XT_MULTIPORT_EITHER)
+		&& count <= XT_MULTI_PORTS;
+}
+
+static int multiport_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct ipt_ip *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+	return check(ip->proto, ip->invflags, multiinfo->flags,
+		     multiinfo->count) ? 0 : -EINVAL;
+}
+
+static int multiport_mt6_check(const struct xt_mtchk_param *par)
+{
+	const struct ip6t_ip6 *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+	return check(ip->proto, ip->invflags, multiinfo->flags,
+		     multiinfo->count) ? 0 : -EINVAL;
+}
+
+static struct xt_match multiport_mt_reg[] __read_mostly = {
+	{
+		.name		= "multiport",
+		.family		= NFPROTO_IPV4,
+		.revision	= 1,
+		.checkentry	= multiport_mt_check,
+		.match		= multiport_mt,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= NFPROTO_IPV6,
+		.revision	= 1,
+		.checkentry	= multiport_mt6_check,
+		.match		= multiport_mt,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init multiport_mt_init(void)
+{
+	return xt_register_matches(multiport_mt_reg,
+	       ARRAY_SIZE(multiport_mt_reg));
+}
+
+static void __exit multiport_mt_exit(void)
+{
+	xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg));
+}
+
+module_init(multiport_mt_init);
+module_exit(multiport_mt_exit);
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
new file mode 100644
index 000000000..b4f7bbc3f
--- /dev/null
+++ b/net/netfilter/xt_nat.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+
+static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (mr->rangesize != 1) {
+		pr_info_ratelimited("multiple ranges no longer supported\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static int xt_nat_checkentry(const struct xt_tgchk_param *par)
+{
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void xt_nat_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static void xt_nat_convert_range(struct nf_nat_range2 *dst,
+				 const struct nf_nat_ipv4_range *src)
+{
+	memset(&dst->min_addr, 0, sizeof(dst->min_addr));
+	memset(&dst->max_addr, 0, sizeof(dst->max_addr));
+	memset(&dst->base_proto, 0, sizeof(dst->base_proto));
+
+	dst->flags	 = src->flags;
+	dst->min_addr.ip = src->min_ip;
+	dst->max_addr.ip = src->max_ip;
+	dst->min_proto	 = src->min;
+	dst->max_proto	 = src->max;
+}
+
+static unsigned int
+xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range2 range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
+
+	xt_nat_convert_range(&range, &mr->range[0]);
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range2 range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+
+	xt_nat_convert_range(&range, &mr->range[0]);
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range_v1 = par->targinfo;
+	struct nf_nat_range2 range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
+
+	memcpy(&range, range_v1, sizeof(*range_v1));
+	memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range_v1 = par->targinfo;
+	struct nf_nat_range2 range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+
+	memcpy(&range, range_v1, sizeof(*range_v1));
+	memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
+
+	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+
+	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
+}
+
+static struct xt_target xt_nat_target_reg[] __read_mostly = {
+	{
+		.name		= "SNAT",
+		.revision	= 0,
+		.checkentry	= xt_nat_checkentry_v0,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_snat_target_v0,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.family		= NFPROTO_IPV4,
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 0,
+		.checkentry	= xt_nat_checkentry_v0,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_dnat_target_v0,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.family		= NFPROTO_IPV4,
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SNAT",
+		.revision	= 1,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_snat_target_v1,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 1,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_dnat_target_v1,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SNAT",
+		.revision	= 2,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_snat_target_v2,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 2,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_dnat_target_v2,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_nat_init(void)
+{
+	return xt_register_targets(xt_nat_target_reg,
+				   ARRAY_SIZE(xt_nat_target_reg));
+}
+
+static void __exit xt_nat_exit(void)
+{
+	xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg));
+}
+
+module_init(xt_nat_init);
+module_exit(xt_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ipt_SNAT");
+MODULE_ALIAS("ipt_DNAT");
+MODULE_ALIAS("ip6t_SNAT");
+MODULE_ALIAS("ip6t_DNAT");
+MODULE_DESCRIPTION("SNAT and DNAT targets support");
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
new file mode 100644
index 000000000..7c6bf1c16
--- /dev/null
+++ b/net/netfilter/xt_nfacct.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/xt_nfacct.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_nfacct");
+MODULE_ALIAS("ip6t_nfacct");
+
+static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	int overquota;
+	const struct xt_nfacct_match_info *info = par->targinfo;
+
+	nfnl_acct_update(skb, info->nfacct);
+
+	overquota = nfnl_acct_overquota(xt_net(par), info->nfacct);
+
+	return overquota != NFACCT_UNDERQUOTA;
+}
+
+static int
+nfacct_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_nfacct_match_info *info = par->matchinfo;
+	struct nf_acct *nfacct;
+
+	nfacct = nfnl_acct_find_get(par->net, info->name);
+	if (nfacct == NULL) {
+		pr_info_ratelimited("accounting object `%s' does not exists\n",
+				    info->name);
+		return -ENOENT;
+	}
+	info->nfacct = nfacct;
+	return 0;
+}
+
+static void
+nfacct_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_nfacct_match_info *info = par->matchinfo;
+
+	nfnl_acct_put(info->nfacct);
+}
+
+static struct xt_match nfacct_mt_reg[] __read_mostly = {
+	{
+		.name       = "nfacct",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info),
+		.usersize   = offsetof(struct xt_nfacct_match_info, nfacct),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "nfacct",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info_v1),
+		.usersize   = offsetof(struct xt_nfacct_match_info_v1, nfacct),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init nfacct_mt_init(void)
+{
+	return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
+}
+
+static void __exit nfacct_mt_exit(void)
+{
+	xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
+}
+
+module_init(nfacct_mt_init);
+module_exit(nfacct_mt_exit);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644
index 000000000..dc9485854
--- /dev/null
+++ b/net/netfilter/xt_osf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/capability.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/xt_osf.h>
+
+static bool
+xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
+{
+	return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
+			    xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
+}
+
+static struct xt_match xt_osf_match = {
+	.name 		= "osf",
+	.revision	= 0,
+	.family		= NFPROTO_IPV4,
+	.proto		= IPPROTO_TCP,
+	.hooks      	= (1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_FORWARD),
+	.match 		= xt_osf_match_packet,
+	.matchsize	= sizeof(struct xt_osf_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_osf_init(void)
+{
+	int err;
+
+	err = xt_register_match(&xt_osf_match);
+	if (err) {
+		pr_err("Failed to register OS fingerprint "
+		       "matching module (%d)\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit xt_osf_fini(void)
+{
+	xt_unregister_match(&xt_osf_match);
+}
+
+module_init(xt_osf_init);
+module_exit(xt_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS("ipt_osf");
+MODULE_ALIAS("ip6t_osf");
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
new file mode 100644
index 000000000..50332888c
--- /dev/null
+++ b/net/netfilter/xt_owner.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel module to match various things tied to sockets associated with
+ * locally generated outgoing packets.
+ *
+ * (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/cred.h>
+
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_owner.h>
+
+static int owner_check(const struct xt_mtchk_param *par)
+{
+	struct xt_owner_match_info *info = par->matchinfo;
+	struct net *net = par->net;
+
+	if (info->match & ~XT_OWNER_MASK)
+		return -EINVAL;
+
+	/* Only allow the common case where the userns of the writer
+	 * matches the userns of the network namespace.
+	 */
+	if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
+	    (current_user_ns() != net->user_ns))
+		return -EINVAL;
+
+	/* Ensure the uids are valid */
+	if (info->match & XT_OWNER_UID) {
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+		if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+		    (info->uid_max < info->uid_min) ||
+		    uid_lt(uid_max, uid_min)) {
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure the gids are valid */
+	if (info->match & XT_OWNER_GID) {
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+		if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+		    (info->gid_max < info->gid_min) ||
+		    gid_lt(gid_max, gid_min)) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static bool
+owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_owner_match_info *info = par->matchinfo;
+	const struct file *filp;
+	struct sock *sk = skb_to_full_sk(skb);
+	struct net *net = xt_net(par);
+
+	if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
+		return (info->match ^ info->invert) == 0;
+	else if (info->match & info->invert & XT_OWNER_SOCKET)
+		/*
+		 * Socket exists but user wanted ! --socket-exists.
+		 * (Single ampersands intended.)
+		 */
+		return false;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+	if (filp == NULL) {
+		read_unlock_bh(&sk->sk_callback_lock);
+		return ((info->match ^ info->invert) &
+		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
+	}
+
+	if (info->match & XT_OWNER_UID) {
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+		if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
+		     uid_lte(filp->f_cred->fsuid, uid_max)) ^
+		    !(info->invert & XT_OWNER_UID)) {
+			read_unlock_bh(&sk->sk_callback_lock);
+			return false;
+		}
+	}
+
+	if (info->match & XT_OWNER_GID) {
+		unsigned int i, match = false;
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+		struct group_info *gi = filp->f_cred->group_info;
+
+		if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+		    gid_lte(filp->f_cred->fsgid, gid_max))
+			match = true;
+
+		if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+			for (i = 0; i < gi->ngroups; ++i) {
+				kgid_t group = gi->gid[i];
+
+				if (gid_gte(group, gid_min) &&
+				    gid_lte(group, gid_max)) {
+					match = true;
+					break;
+				}
+			}
+		}
+
+		if (match ^ !(info->invert & XT_OWNER_GID)) {
+			read_unlock_bh(&sk->sk_callback_lock);
+			return false;
+		}
+	}
+
+	read_unlock_bh(&sk->sk_callback_lock);
+	return true;
+}
+
+static struct xt_match owner_mt_reg __read_mostly = {
+	.name       = "owner",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = owner_check,
+	.match      = owner_mt,
+	.matchsize  = sizeof(struct xt_owner_match_info),
+	.hooks      = (1 << NF_INET_LOCAL_OUT) |
+	              (1 << NF_INET_POST_ROUTING),
+	.me         = THIS_MODULE,
+};
+
+static int __init owner_mt_init(void)
+{
+	return xt_register_match(&owner_mt_reg);
+}
+
+static void __exit owner_mt_exit(void)
+{
+	xt_unregister_match(&owner_mt_reg);
+}
+
+module_init(owner_mt_init);
+module_exit(owner_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: socket owner matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
new file mode 100644
index 000000000..343e65f37
--- /dev/null
+++ b/net/netfilter/xt_physdev.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match the bridge port in and
+ * out device for IP packets coming into contact with a bridge. */
+
+/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter/x_tables.h>
+#include <uapi/linux/netfilter/xt_physdev.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Xtables: Bridge physical device match");
+MODULE_ALIAS("ipt_physdev");
+MODULE_ALIAS("ip6t_physdev");
+
+
+static bool
+physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_physdev_info *info = par->matchinfo;
+	const struct net_device *physdev;
+	unsigned long ret;
+	const char *indev, *outdev;
+
+	/* Not a bridged IP packet or no info available yet:
+	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
+	 * the destination device will be a bridge. */
+	if (!nf_bridge_info_exists(skb)) {
+		/* Return MATCH if the invert flags of the used options are on */
+		if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
+		    !(info->invert & XT_PHYSDEV_OP_BRIDGED))
+			return false;
+		if ((info->bitmask & XT_PHYSDEV_OP_ISIN) &&
+		    !(info->invert & XT_PHYSDEV_OP_ISIN))
+			return false;
+		if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) &&
+		    !(info->invert & XT_PHYSDEV_OP_ISOUT))
+			return false;
+		if ((info->bitmask & XT_PHYSDEV_OP_IN) &&
+		    !(info->invert & XT_PHYSDEV_OP_IN))
+			return false;
+		if ((info->bitmask & XT_PHYSDEV_OP_OUT) &&
+		    !(info->invert & XT_PHYSDEV_OP_OUT))
+			return false;
+		return true;
+	}
+
+	physdev = nf_bridge_get_physoutdev(skb);
+	outdev = physdev ? physdev->name : NULL;
+
+	/* This only makes sense in the FORWARD and POSTROUTING chains */
+	if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
+	    (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+		return false;
+
+	physdev = nf_bridge_get_physindev(skb, xt_net(par));
+	indev = physdev ? physdev->name : NULL;
+
+	if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
+	    (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+	    (info->bitmask & XT_PHYSDEV_OP_ISOUT &&
+	    (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+		return false;
+
+	if (!(info->bitmask & XT_PHYSDEV_OP_IN))
+		goto match_outdev;
+
+	if (indev) {
+		ret = ifname_compare_aligned(indev, info->physindev,
+					     info->in_mask);
+
+		if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+			return false;
+	}
+
+match_outdev:
+	if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
+		return true;
+
+	if (!outdev)
+		return false;
+
+	ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
+
+	return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
+}
+
+static int physdev_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_physdev_info *info = par->matchinfo;
+	static bool brnf_probed __read_mostly;
+
+	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
+	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
+		return -EINVAL;
+	if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
+	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
+	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
+	    par->hook_mask & (1 << NF_INET_LOCAL_OUT)) {
+		pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n");
+		return -EINVAL;
+	}
+
+	if (!brnf_probed) {
+		brnf_probed = true;
+		request_module("br_netfilter");
+	}
+
+	return 0;
+}
+
+static struct xt_match physdev_mt_reg __read_mostly = {
+	.name       = "physdev",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = physdev_mt_check,
+	.match      = physdev_mt,
+	.matchsize  = sizeof(struct xt_physdev_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init physdev_mt_init(void)
+{
+	return xt_register_match(&physdev_mt_reg);
+}
+
+static void __exit physdev_mt_exit(void)
+{
+	xt_unregister_match(&physdev_mt_reg);
+}
+
+module_init(physdev_mt_init);
+module_exit(physdev_mt_exit);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
new file mode 100644
index 000000000..f48946aef
--- /dev/null
+++ b/net/netfilter/xt_pkttype.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/netfilter/xt_pkttype.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
+MODULE_DESCRIPTION("Xtables: link layer packet type match");
+MODULE_ALIAS("ipt_pkttype");
+MODULE_ALIAS("ip6t_pkttype");
+
+static bool
+pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_pkttype_info *info = par->matchinfo;
+	u_int8_t type;
+
+	if (skb->pkt_type != PACKET_LOOPBACK)
+		type = skb->pkt_type;
+	else if (xt_family(par) == NFPROTO_IPV4 &&
+	    ipv4_is_multicast(ip_hdr(skb)->daddr))
+		type = PACKET_MULTICAST;
+	else if (xt_family(par) == NFPROTO_IPV6)
+		type = PACKET_MULTICAST;
+	else
+		type = PACKET_BROADCAST;
+
+	return (type == info->pkttype) ^ info->invert;
+}
+
+static struct xt_match pkttype_mt_reg __read_mostly = {
+	.name      = "pkttype",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = pkttype_mt,
+	.matchsize = sizeof(struct xt_pkttype_info),
+	.me        = THIS_MODULE,
+};
+
+static int __init pkttype_mt_init(void)
+{
+	return xt_register_match(&pkttype_mt_reg);
+}
+
+static void __exit pkttype_mt_exit(void)
+{
+	xt_unregister_match(&pkttype_mt_reg);
+}
+
+module_init(pkttype_mt_init);
+module_exit(pkttype_mt_exit);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
new file mode 100644
index 000000000..cb6e82790
--- /dev/null
+++ b/net/netfilter/xt_policy.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_policy.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: IPsec policy match");
+MODULE_LICENSE("GPL");
+
+static inline bool
+xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
+	    const union nf_inet_addr *a2, unsigned short family)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		return ((a1->ip ^ a2->ip) & m->ip) == 0;
+	case NFPROTO_IPV6:
+		return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
+	}
+	return false;
+}
+
+static bool
+match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
+		 unsigned short family)
+{
+#define MATCH_ADDR(x,y,z)	(!e->match.x ||			       \
+				 (xt_addr_cmp(&e->x, &e->y, (const union nf_inet_addr *)(z), family) \
+				  ^ e->invert.x))
+#define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+	return MATCH_ADDR(saddr, smask, &x->props.saddr) &&
+	       MATCH_ADDR(daddr, dmask, &x->id.daddr) &&
+	       MATCH(proto, x->id.proto) &&
+	       MATCH(mode, x->props.mode) &&
+	       MATCH(spi, x->id.spi) &&
+	       MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
+		unsigned short family)
+{
+	const struct xt_policy_elem *e;
+	const struct sec_path *sp = skb_sec_path(skb);
+	int strict = info->flags & XT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (sp == NULL)
+		return -1;
+	if (strict && info->len != sp->len)
+		return 0;
+
+	for (i = sp->len - 1; i >= 0; i--) {
+		pos = strict ? i - sp->len + 1 : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(sp->xvec[i], e, family)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
+		 unsigned short family)
+{
+	const struct xt_policy_elem *e;
+	const struct dst_entry *dst = skb_dst(skb);
+	int strict = info->flags & XT_POLICY_MATCH_STRICT;
+	int i, pos;
+
+	if (dst->xfrm == NULL)
+		return -1;
+
+	for (i = 0; dst && dst->xfrm;
+	     dst = ((struct xfrm_dst *)dst)->child, i++) {
+		pos = strict ? i : 0;
+		if (pos >= info->len)
+			return 0;
+		e = &info->pol[pos];
+
+		if (match_xfrm_state(dst->xfrm, e, family)) {
+			if (!strict)
+				return 1;
+		} else if (strict)
+			return 0;
+	}
+
+	return strict ? i == info->len : 0;
+}
+
+static bool
+policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_policy_info *info = par->matchinfo;
+	int ret;
+
+	if (info->flags & XT_POLICY_MATCH_IN)
+		ret = match_policy_in(skb, info, xt_family(par));
+	else
+		ret = match_policy_out(skb, info, xt_family(par));
+
+	if (ret < 0)
+		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
+	else if (info->flags & XT_POLICY_MATCH_NONE)
+		ret = false;
+
+	return ret;
+}
+
+static int policy_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_policy_info *info = par->matchinfo;
+	const char *errmsg = "neither incoming nor outgoing policy selected";
+
+	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
+		goto err;
+
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
+		errmsg = "output policy not valid in PREROUTING and INPUT";
+		goto err;
+	}
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
+		errmsg = "input policy not valid in POSTROUTING and OUTPUT";
+		goto err;
+	}
+	if (info->len > XT_POLICY_MAX_ELEM) {
+		errmsg = "too many policy elements";
+		goto err;
+	}
+	return 0;
+err:
+	pr_info_ratelimited("%s\n", errmsg);
+	return -EINVAL;
+}
+
+static struct xt_match policy_mt_reg[] __read_mostly = {
+	{
+		.name		= "policy",
+		.family		= NFPROTO_IPV4,
+		.checkentry 	= policy_mt_check,
+		.match		= policy_mt,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "policy",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= policy_mt_check,
+		.match		= policy_mt,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init policy_mt_init(void)
+{
+	return xt_register_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
+}
+
+static void __exit policy_mt_exit(void)
+{
+	xt_unregister_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
+}
+
+module_init(policy_mt_init);
+module_exit(policy_mt_exit);
+MODULE_ALIAS("ipt_policy");
+MODULE_ALIAS("ip6t_policy");
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
new file mode 100644
index 000000000..4452cc93b
--- /dev/null
+++ b/net/netfilter/xt_quota.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * netfilter module to enforce network quotas
+ *
+ * Sam Johnston <samj@samj.net>
+ */
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota.h>
+#include <linux/module.h>
+
+struct xt_quota_priv {
+	spinlock_t	lock;
+	uint64_t	quota;
+};
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_DESCRIPTION("Xtables: countdown quota match");
+MODULE_ALIAS("ipt_quota");
+MODULE_ALIAS("ip6t_quota");
+
+static bool
+quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct xt_quota_info *q = (void *)par->matchinfo;
+	struct xt_quota_priv *priv = q->master;
+	bool ret = q->flags & XT_QUOTA_INVERT;
+
+	spin_lock_bh(&priv->lock);
+	if (priv->quota >= skb->len) {
+		priv->quota -= skb->len;
+		ret = !ret;
+	} else {
+		/* we do not allow even small packets from now on */
+		priv->quota = 0;
+	}
+	spin_unlock_bh(&priv->lock);
+
+	return ret;
+}
+
+static int quota_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_quota_info *q = par->matchinfo;
+
+	if (q->flags & ~XT_QUOTA_MASK)
+		return -EINVAL;
+
+	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
+	if (q->master == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&q->master->lock);
+	q->master->quota = q->quota;
+	return 0;
+}
+
+static void quota_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_quota_info *q = par->matchinfo;
+
+	kfree(q->master);
+}
+
+static struct xt_match quota_mt_reg __read_mostly = {
+	.name       = "quota",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = quota_mt,
+	.checkentry = quota_mt_check,
+	.destroy    = quota_mt_destroy,
+	.matchsize  = sizeof(struct xt_quota_info),
+	.usersize   = offsetof(struct xt_quota_info, master),
+	.me         = THIS_MODULE,
+};
+
+static int __init quota_mt_init(void)
+{
+	return xt_register_match(&quota_mt_reg);
+}
+
+static void __exit quota_mt_exit(void)
+{
+	xt_unregister_match(&quota_mt_reg);
+}
+
+module_init(quota_mt_init);
+module_exit(quota_mt_exit);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
new file mode 100644
index 000000000..72324bd97
--- /dev/null
+++ b/net/netfilter/xt_rateest.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_rateest.h>
+#include <net/netfilter/xt_rateest.h>
+
+
+static bool
+xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_rateest_match_info *info = par->matchinfo;
+	struct gnet_stats_rate_est64 sample = {0};
+	u_int32_t bps1, bps2, pps1, pps2;
+	bool ret = true;
+
+	gen_estimator_read(&info->est1->rate_est, &sample);
+
+	if (info->flags & XT_RATEEST_MATCH_DELTA) {
+		bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0;
+		pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0;
+	} else {
+		bps1 = sample.bps;
+		pps1 = sample.pps;
+	}
+
+	if (info->flags & XT_RATEEST_MATCH_ABS) {
+		bps2 = info->bps2;
+		pps2 = info->pps2;
+	} else {
+		gen_estimator_read(&info->est2->rate_est, &sample);
+
+		if (info->flags & XT_RATEEST_MATCH_DELTA) {
+			bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0;
+			pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0;
+		} else {
+			bps2 = sample.bps;
+			pps2 = sample.pps;
+		}
+	}
+
+	switch (info->mode) {
+	case XT_RATEEST_MATCH_LT:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 < bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps1 < pps2;
+		break;
+	case XT_RATEEST_MATCH_GT:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 > bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps1 > pps2;
+		break;
+	case XT_RATEEST_MATCH_EQ:
+		if (info->flags & XT_RATEEST_MATCH_BPS)
+			ret &= bps1 == bps2;
+		if (info->flags & XT_RATEEST_MATCH_PPS)
+			ret &= pps1 == pps2;
+		break;
+	}
+
+	ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
+	return ret;
+}
+
+static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_rateest_match_info *info = par->matchinfo;
+	struct xt_rateest *est1, *est2;
+	int ret = -EINVAL;
+
+	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
+				     XT_RATEEST_MATCH_REL)) != 1)
+		goto err1;
+
+	if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
+		goto err1;
+
+	switch (info->mode) {
+	case XT_RATEEST_MATCH_EQ:
+	case XT_RATEEST_MATCH_LT:
+	case XT_RATEEST_MATCH_GT:
+		break;
+	default:
+		goto err1;
+	}
+
+	ret  = -ENOENT;
+	est1 = xt_rateest_lookup(par->net, info->name1);
+	if (!est1)
+		goto err1;
+
+	est2 = NULL;
+	if (info->flags & XT_RATEEST_MATCH_REL) {
+		est2 = xt_rateest_lookup(par->net, info->name2);
+		if (!est2)
+			goto err2;
+	}
+
+	info->est1 = est1;
+	info->est2 = est2;
+	return 0;
+
+err2:
+	xt_rateest_put(par->net, est1);
+err1:
+	return ret;
+}
+
+static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_rateest_match_info *info = par->matchinfo;
+
+	xt_rateest_put(par->net, info->est1);
+	if (info->est2)
+		xt_rateest_put(par->net, info->est2);
+}
+
+static struct xt_match xt_rateest_mt_reg __read_mostly = {
+	.name       = "rateest",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = xt_rateest_mt,
+	.checkentry = xt_rateest_mt_checkentry,
+	.destroy    = xt_rateest_mt_destroy,
+	.matchsize  = sizeof(struct xt_rateest_match_info),
+	.usersize   = offsetof(struct xt_rateest_match_info, est1),
+	.me         = THIS_MODULE,
+};
+
+static int __init xt_rateest_mt_init(void)
+{
+	return xt_register_match(&xt_rateest_mt_reg);
+}
+
+static void __exit xt_rateest_mt_fini(void)
+{
+	xt_unregister_match(&xt_rateest_mt_reg);
+}
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xtables rate estimator match");
+MODULE_ALIAS("ipt_rateest");
+MODULE_ALIAS("ip6t_rateest");
+module_init(xt_rateest_mt_init);
+module_exit(xt_rateest_mt_fini);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
new file mode 100644
index 000000000..6df485f44
--- /dev/null
+++ b/net/netfilter/xt_realm.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* IP tables module for matching the routing realm
+ *
+ * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/xt_realm.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Routing realm match");
+MODULE_ALIAS("ipt_realm");
+
+static bool
+realm_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_realm_info *info = par->matchinfo;
+	const struct dst_entry *dst = skb_dst(skb);
+
+	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
+}
+
+static struct xt_match realm_mt_reg __read_mostly = {
+	.name		= "realm",
+	.match		= realm_mt,
+	.matchsize	= sizeof(struct xt_realm_info),
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
+	.family		= NFPROTO_UNSPEC,
+	.me		= THIS_MODULE
+};
+
+static int __init realm_mt_init(void)
+{
+	return xt_register_match(&realm_mt_reg);
+}
+
+static void __exit realm_mt_exit(void)
+{
+	xt_unregister_match(&realm_mt_reg);
+}
+
+module_init(realm_mt_init);
+module_exit(realm_mt_exit);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
new file mode 100644
index 000000000..ef93e0d3b
--- /dev/null
+++ b/net/netfilter/xt_recent.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * This is a replacement of the old ipt_recent module, which carried the
+ * following copyright notice:
+ *
+ * Author: Stephen Frost <sfrost@snowman.net>
+ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_recent.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_recent");
+MODULE_ALIAS("ip6t_recent");
+
+static unsigned int ip_list_tot __read_mostly = 100;
+static unsigned int ip_list_hash_size __read_mostly;
+static unsigned int ip_list_perms __read_mostly = 0644;
+static unsigned int ip_list_uid __read_mostly;
+static unsigned int ip_list_gid __read_mostly;
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0644);
+module_param(ip_list_gid, uint, 0644);
+MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
+MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
+
+/* retained for backwards compatibility */
+static unsigned int ip_pkt_list_tot __read_mostly;
+module_param(ip_pkt_list_tot, uint, 0400);
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
+
+#define XT_RECENT_MAX_NSTAMPS	256
+
+struct recent_entry {
+	struct list_head	list;
+	struct list_head	lru_list;
+	union nf_inet_addr	addr;
+	u_int16_t		family;
+	u_int8_t		ttl;
+	u_int8_t		index;
+	u_int16_t		nstamps;
+	unsigned long		stamps[];
+};
+
+struct recent_table {
+	struct list_head	list;
+	char			name[XT_RECENT_NAME_LEN];
+	union nf_inet_addr	mask;
+	unsigned int		refcnt;
+	unsigned int		entries;
+	u8			nstamps_max_mask;
+	struct list_head	lru_list;
+	struct list_head	iphash[];
+};
+
+struct recent_net {
+	struct list_head	tables;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*xt_recent;
+#endif
+};
+
+static unsigned int recent_net_id __read_mostly;
+
+static inline struct recent_net *recent_pernet(struct net *net)
+{
+	return net_generic(net, recent_net_id);
+}
+
+static DEFINE_SPINLOCK(recent_lock);
+static DEFINE_MUTEX(recent_mutex);
+
+#ifdef CONFIG_PROC_FS
+static const struct proc_ops recent_mt_proc_ops;
+#endif
+
+static u_int32_t hash_rnd __read_mostly;
+
+static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+{
+	return jhash_1word((__force u32)addr->ip, hash_rnd) &
+	       (ip_list_hash_size - 1);
+}
+
+static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+{
+	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
+	       (ip_list_hash_size - 1);
+}
+
+static struct recent_entry *
+recent_entry_lookup(const struct recent_table *table,
+		    const union nf_inet_addr *addrp, u_int16_t family,
+		    u_int8_t ttl)
+{
+	struct recent_entry *e;
+	unsigned int h;
+
+	if (family == NFPROTO_IPV4)
+		h = recent_entry_hash4(addrp);
+	else
+		h = recent_entry_hash6(addrp);
+
+	list_for_each_entry(e, &table->iphash[h], list)
+		if (e->family == family &&
+		    memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
+		    (ttl == e->ttl || ttl == 0 || e->ttl == 0))
+			return e;
+	return NULL;
+}
+
+static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
+{
+	list_del(&e->list);
+	list_del(&e->lru_list);
+	kfree(e);
+	t->entries--;
+}
+
+/*
+ * Drop entries with timestamps older then 'time'.
+ */
+static void recent_entry_reap(struct recent_table *t, unsigned long time,
+			      struct recent_entry *working, bool update)
+{
+	struct recent_entry *e;
+
+	/*
+	 * The head of the LRU list is always the oldest entry.
+	 */
+	e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+
+	/*
+	 * Do not reap the entry which are going to be updated.
+	 */
+	if (e == working && update)
+		return;
+
+	/*
+	 * The last time stamp is the most recent.
+	 */
+	if (time_after(time, e->stamps[e->index-1]))
+		recent_entry_remove(t, e);
+}
+
+static struct recent_entry *
+recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
+		  u_int16_t family, u_int8_t ttl)
+{
+	struct recent_entry *e;
+	unsigned int nstamps_max = t->nstamps_max_mask;
+
+	if (t->entries >= ip_list_tot) {
+		e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+		recent_entry_remove(t, e);
+	}
+
+	nstamps_max += 1;
+	e = kmalloc(struct_size(e, stamps, nstamps_max), GFP_ATOMIC);
+	if (e == NULL)
+		return NULL;
+	memcpy(&e->addr, addr, sizeof(e->addr));
+	e->ttl       = ttl;
+	e->stamps[0] = jiffies;
+	e->nstamps   = 1;
+	e->index     = 1;
+	e->family    = family;
+	if (family == NFPROTO_IPV4)
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
+	else
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
+	list_add_tail(&e->lru_list, &t->lru_list);
+	t->entries++;
+	return e;
+}
+
+static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
+{
+	e->index &= t->nstamps_max_mask;
+	e->stamps[e->index++] = jiffies;
+	if (e->index > e->nstamps)
+		e->nstamps = e->index;
+	list_move_tail(&e->lru_list, &t->lru_list);
+}
+
+static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
+						const char *name)
+{
+	struct recent_table *t;
+
+	list_for_each_entry(t, &recent_net->tables, list)
+		if (!strcmp(t->name, name))
+			return t;
+	return NULL;
+}
+
+static void recent_table_flush(struct recent_table *t)
+{
+	struct recent_entry *e, *next;
+	unsigned int i;
+
+	for (i = 0; i < ip_list_hash_size; i++)
+		list_for_each_entry_safe(e, next, &t->iphash[i], list)
+			recent_entry_remove(t, e);
+}
+
+static bool
+recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = xt_net(par);
+	struct recent_net *recent_net = recent_pernet(net);
+	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
+	struct recent_table *t;
+	struct recent_entry *e;
+	union nf_inet_addr addr = {}, addr_mask;
+	u_int8_t ttl;
+	bool ret = info->invert;
+
+	if (xt_family(par) == NFPROTO_IPV4) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			addr.ip = iph->daddr;
+		else
+			addr.ip = iph->saddr;
+
+		ttl = iph->ttl;
+	} else {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
+		else
+			memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
+
+		ttl = iph->hop_limit;
+	}
+
+	/* use TTL as seen before forwarding */
+	if (xt_out(par) != NULL &&
+	    (!skb->sk || !net_eq(net, sock_net(skb->sk))))
+		ttl++;
+
+	spin_lock_bh(&recent_lock);
+	t = recent_table_lookup(recent_net, info->name);
+
+	nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
+
+	e = recent_entry_lookup(t, &addr_mask, xt_family(par),
+				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
+	if (e == NULL) {
+		if (!(info->check_set & XT_RECENT_SET))
+			goto out;
+		e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
+		if (e == NULL)
+			par->hotdrop = true;
+		ret = !ret;
+		goto out;
+	}
+
+	if (info->check_set & XT_RECENT_SET)
+		ret = !ret;
+	else if (info->check_set & XT_RECENT_REMOVE) {
+		recent_entry_remove(t, e);
+		ret = !ret;
+	} else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
+		unsigned long time = jiffies - info->seconds * HZ;
+		unsigned int i, hits = 0;
+
+		for (i = 0; i < e->nstamps; i++) {
+			if (info->seconds && time_after(time, e->stamps[i]))
+				continue;
+			if (!info->hit_count || ++hits >= info->hit_count) {
+				ret = !ret;
+				break;
+			}
+		}
+
+		/* info->seconds must be non-zero */
+		if (info->check_set & XT_RECENT_REAP)
+			recent_entry_reap(t, time, e,
+				info->check_set & XT_RECENT_UPDATE && ret);
+	}
+
+	if (info->check_set & XT_RECENT_SET ||
+	    (info->check_set & XT_RECENT_UPDATE && ret)) {
+		recent_entry_update(t, e);
+		e->ttl = ttl;
+	}
+out:
+	spin_unlock_bh(&recent_lock);
+	return ret;
+}
+
+static void recent_table_free(void *addr)
+{
+	kvfree(addr);
+}
+
+static int recent_mt_check(const struct xt_mtchk_param *par,
+			   const struct xt_recent_mtinfo_v1 *info)
+{
+	struct recent_net *recent_net = recent_pernet(par->net);
+	struct recent_table *t;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *pde;
+	kuid_t uid;
+	kgid_t gid;
+#endif
+	unsigned int nstamp_mask;
+	unsigned int i;
+	int ret = -EINVAL;
+
+	net_get_random_once(&hash_rnd, sizeof(hash_rnd));
+
+	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
+		pr_info_ratelimited("Unsupported userspace flags (%08x)\n",
+				    info->check_set);
+		return -EINVAL;
+	}
+	if (hweight8(info->check_set &
+		     (XT_RECENT_SET | XT_RECENT_REMOVE |
+		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
+		return -EINVAL;
+	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
+	    (info->seconds || info->hit_count ||
+	    (info->check_set & XT_RECENT_MODIFIERS)))
+		return -EINVAL;
+	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
+		return -EINVAL;
+	if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) {
+		pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n",
+				    info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
+		return -EINVAL;
+	}
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
+
+	if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
+		nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
+	else if (info->hit_count)
+		nstamp_mask = roundup_pow_of_two(info->hit_count) - 1;
+	else
+		nstamp_mask = 32 - 1;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(recent_net, info->name);
+	if (t != NULL) {
+		if (nstamp_mask > t->nstamps_max_mask) {
+			spin_lock_bh(&recent_lock);
+			recent_table_flush(t);
+			t->nstamps_max_mask = nstamp_mask;
+			spin_unlock_bh(&recent_lock);
+		}
+
+		t->refcnt++;
+		ret = 0;
+		goto out;
+	}
+
+	t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL);
+	if (t == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	t->refcnt = 1;
+	t->nstamps_max_mask = nstamp_mask;
+
+	memcpy(&t->mask, &info->mask, sizeof(t->mask));
+	strcpy(t->name, info->name);
+	INIT_LIST_HEAD(&t->lru_list);
+	for (i = 0; i < ip_list_hash_size; i++)
+		INIT_LIST_HEAD(&t->iphash[i]);
+#ifdef CONFIG_PROC_FS
+	uid = make_kuid(&init_user_ns, ip_list_uid);
+	gid = make_kgid(&init_user_ns, ip_list_gid);
+	if (!uid_valid(uid) || !gid_valid(gid)) {
+		recent_table_free(t);
+		ret = -EINVAL;
+		goto out;
+	}
+	pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
+			       &recent_mt_proc_ops, t);
+	if (pde == NULL) {
+		recent_table_free(t);
+		ret = -ENOMEM;
+		goto out;
+	}
+	proc_set_user(pde, uid, gid);
+#endif
+	spin_lock_bh(&recent_lock);
+	list_add_tail(&t->list, &recent_net->tables);
+	spin_unlock_bh(&recent_lock);
+	ret = 0;
+out:
+	mutex_unlock(&recent_mutex);
+	return ret;
+}
+
+static int recent_mt_check_v0(const struct xt_mtchk_param *par)
+{
+	const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo;
+	struct xt_recent_mtinfo_v1 info_v1;
+
+	/* Copy revision 0 structure to revision 1 */
+	memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo));
+	/* Set default mask to ensure backward compatible behaviour */
+	memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all));
+
+	return recent_mt_check(par, &info_v1);
+}
+
+static int recent_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	return recent_mt_check(par, par->matchinfo);
+}
+
+static void recent_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	struct recent_net *recent_net = recent_pernet(par->net);
+	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
+	struct recent_table *t;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(recent_net, info->name);
+	if (--t->refcnt == 0) {
+		spin_lock_bh(&recent_lock);
+		list_del(&t->list);
+		spin_unlock_bh(&recent_lock);
+#ifdef CONFIG_PROC_FS
+		if (recent_net->xt_recent != NULL)
+			remove_proc_entry(t->name, recent_net->xt_recent);
+#endif
+		recent_table_flush(t);
+		recent_table_free(t);
+	}
+	mutex_unlock(&recent_mutex);
+}
+
+#ifdef CONFIG_PROC_FS
+struct recent_iter_state {
+	const struct recent_table *table;
+	unsigned int		bucket;
+};
+
+static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(recent_lock)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	struct recent_entry *e;
+	loff_t p = *pos;
+
+	spin_lock_bh(&recent_lock);
+
+	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
+		list_for_each_entry(e, &t->iphash[st->bucket], list)
+			if (p-- == 0)
+				return e;
+	return NULL;
+}
+
+static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	const struct recent_entry *e = v;
+	const struct list_head *head = e->list.next;
+
+	(*pos)++;
+	while (head == &t->iphash[st->bucket]) {
+		if (++st->bucket >= ip_list_hash_size)
+			return NULL;
+		head = t->iphash[st->bucket].next;
+	}
+	return list_entry(head, struct recent_entry, list);
+}
+
+static void recent_seq_stop(struct seq_file *s, void *v)
+	__releases(recent_lock)
+{
+	spin_unlock_bh(&recent_lock);
+}
+
+static int recent_seq_show(struct seq_file *seq, void *v)
+{
+	const struct recent_entry *e = v;
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	unsigned int i;
+
+	i = (e->index - 1) & t->nstamps_max_mask;
+
+	if (e->family == NFPROTO_IPV4)
+		seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u",
+			   &e->addr.ip, e->ttl, e->stamps[i], e->index);
+	else
+		seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u",
+			   &e->addr.in6, e->ttl, e->stamps[i], e->index);
+	for (i = 0; i < e->nstamps; i++)
+		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
+	seq_putc(seq, '\n');
+	return 0;
+}
+
+static const struct seq_operations recent_seq_ops = {
+	.start		= recent_seq_start,
+	.next		= recent_seq_next,
+	.stop		= recent_seq_stop,
+	.show		= recent_seq_show,
+};
+
+static int recent_seq_open(struct inode *inode, struct file *file)
+{
+	struct recent_iter_state *st;
+
+	st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
+	if (st == NULL)
+		return -ENOMEM;
+
+	st->table    = pde_data(inode);
+	return 0;
+}
+
+static ssize_t
+recent_mt_proc_write(struct file *file, const char __user *input,
+		     size_t size, loff_t *loff)
+{
+	struct recent_table *t = pde_data(file_inode(file));
+	struct recent_entry *e;
+	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")];
+	const char *c = buf;
+	union nf_inet_addr addr = {};
+	u_int16_t family;
+	bool add, succ;
+
+	if (size == 0)
+		return 0;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+
+	/* Strict protocol! */
+	if (*loff != 0)
+		return -ESPIPE;
+	switch (*c) {
+	case '/': /* flush table */
+		spin_lock_bh(&recent_lock);
+		recent_table_flush(t);
+		spin_unlock_bh(&recent_lock);
+		return size;
+	case '-': /* remove address */
+		add = false;
+		break;
+	case '+': /* add address */
+		add = true;
+		break;
+	default:
+		pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n");
+		return -EINVAL;
+	}
+
+	++c;
+	--size;
+	if (strnchr(c, size, ':') != NULL) {
+		family = NFPROTO_IPV6;
+		succ   = in6_pton(c, size, (void *)&addr, '\n', NULL);
+	} else {
+		family = NFPROTO_IPV4;
+		succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
+	}
+
+	if (!succ)
+		return -EINVAL;
+
+	spin_lock_bh(&recent_lock);
+	e = recent_entry_lookup(t, &addr, family, 0);
+	if (e == NULL) {
+		if (add)
+			recent_entry_init(t, &addr, family, 0);
+	} else {
+		if (add)
+			recent_entry_update(t, e);
+		else
+			recent_entry_remove(t, e);
+	}
+	spin_unlock_bh(&recent_lock);
+	/* Note we removed one above */
+	*loff += size + 1;
+	return size + 1;
+}
+
+static const struct proc_ops recent_mt_proc_ops = {
+	.proc_open	= recent_seq_open,
+	.proc_read	= seq_read,
+	.proc_write	= recent_mt_proc_write,
+	.proc_release	= seq_release_private,
+	.proc_lseek	= seq_lseek,
+};
+
+static int __net_init recent_proc_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
+	if (!recent_net->xt_recent)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit recent_proc_net_exit(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+	struct recent_table *t;
+
+	/* recent_net_exit() is called before recent_mt_destroy(). Make sure
+	 * that the parent xt_recent proc entry is empty before trying to
+	 * remove it.
+	 */
+	spin_lock_bh(&recent_lock);
+	list_for_each_entry(t, &recent_net->tables, list)
+	        remove_proc_entry(t->name, recent_net->xt_recent);
+
+	recent_net->xt_recent = NULL;
+	spin_unlock_bh(&recent_lock);
+
+	remove_proc_entry("xt_recent", net->proc_net);
+}
+#else
+static inline int recent_proc_net_init(struct net *net)
+{
+	return 0;
+}
+
+static inline void recent_proc_net_exit(struct net *net)
+{
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __net_init recent_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	INIT_LIST_HEAD(&recent_net->tables);
+	return recent_proc_net_init(net);
+}
+
+static void __net_exit recent_net_exit(struct net *net)
+{
+	recent_proc_net_exit(net);
+}
+
+static struct pernet_operations recent_net_ops = {
+	.init	= recent_net_init,
+	.exit	= recent_net_exit,
+	.id	= &recent_net_id,
+	.size	= sizeof(struct recent_net),
+};
+
+static struct xt_match recent_mt_reg[] __read_mostly = {
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check_v0,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check_v0,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "recent",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo_v1),
+		.checkentry = recent_mt_check_v1,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "recent",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo_v1),
+		.checkentry = recent_mt_check_v1,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	}
+};
+
+static int __init recent_mt_init(void)
+{
+	int err;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS);
+
+	if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS)
+		return -EINVAL;
+	ip_list_hash_size = 1 << fls(ip_list_tot);
+
+	err = register_pernet_subsys(&recent_net_ops);
+	if (err)
+		return err;
+	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+	if (err)
+		unregister_pernet_subsys(&recent_net_ops);
+	return err;
+}
+
+static void __exit recent_mt_exit(void)
+{
+	xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+	unregister_pernet_subsys(&recent_net_ops);
+}
+
+module_init(recent_mt_init);
+module_exit(recent_mt_exit);
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
new file mode 100644
index 000000000..68ccbe50b
--- /dev/null
+++ b/net/netfilter/xt_repldata.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Today's hack: quantum tunneling in structs
+ *
+ * 'entries' and 'term' are never anywhere referenced by word in code. In fact,
+ * they serve as the hanging-off data accessed through repl.data[].
+ */
+
+/* tbl has the following structure equivalent, but is C99 compliant:
+ * struct {
+ *	struct type##_replace repl;
+ *	struct type##_standard entries[nhooks];
+ *	struct type##_error term;
+ * } *tbl;
+ */
+
+#define xt_alloc_initial_table(type, typ2) ({ \
+	unsigned int hook_mask = info->valid_hooks; \
+	unsigned int nhooks = hweight32(hook_mask); \
+	unsigned int bytes = 0, hooknum = 0, i = 0; \
+	struct { \
+		struct type##_replace repl; \
+		struct type##_standard entries[]; \
+	} *tbl; \
+	struct type##_error *term; \
+	size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \
+		__alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \
+	tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \
+	if (tbl == NULL) \
+		return NULL; \
+	term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
+	strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+	*term = (struct type##_error)typ2##_ERROR_INIT;  \
+	tbl->repl.valid_hooks = hook_mask; \
+	tbl->repl.num_entries = nhooks + 1; \
+	tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
+			 sizeof(struct type##_error); \
+	for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
+		if (!(hook_mask & 1)) \
+			continue; \
+		tbl->repl.hook_entry[hooknum] = bytes; \
+		tbl->repl.underflow[hooknum]  = bytes; \
+		tbl->entries[i++] = (struct type##_standard) \
+			typ2##_STANDARD_INIT(NF_ACCEPT); \
+		bytes += sizeof(struct type##_standard); \
+	} \
+	tbl; \
+})
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
new file mode 100644
index 000000000..d4bf089c9
--- /dev/null
+++ b/net/netfilter/xt_sctp.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/sctp/sctp.h>
+#include <linux/sctp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_sctp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
+MODULE_ALIAS("ipt_sctp");
+MODULE_ALIAS("ip6t_sctp");
+
+#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+					      || (!!((invflag) & (option)) ^ (cond)))
+
+static bool
+match_flags(const struct xt_sctp_flag_info *flag_info,
+	    const int flag_count,
+	    u_int8_t chunktype,
+	    u_int8_t chunkflags)
+{
+	int i;
+
+	for (i = 0; i < flag_count; i++)
+		if (flag_info[i].chunktype == chunktype)
+			return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
+
+	return true;
+}
+
+static inline bool
+match_packet(const struct sk_buff *skb,
+	     unsigned int offset,
+	     const struct xt_sctp_info *info,
+	     bool *hotdrop)
+{
+	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+	int chunk_match_type = info->chunk_match_type;
+	const struct xt_sctp_flag_info *flag_info = info->flag_info;
+	int flag_count = info->flag_count;
+
+#ifdef DEBUG
+	int i = 0;
+#endif
+
+	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL)
+		SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap);
+
+	do {
+		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
+		if (sch == NULL || sch->length == 0) {
+			pr_debug("Dropping invalid SCTP packet.\n");
+			*hotdrop = true;
+			return false;
+		}
+#ifdef DEBUG
+		pr_debug("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d"
+			 "\tflags: %x\n",
+			 ++i, offset, sch->type, htons(sch->length),
+			 sch->flags);
+#endif
+		offset += SCTP_PAD4(ntohs(sch->length));
+
+		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
+
+		if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
+			switch (chunk_match_type) {
+			case SCTP_CHUNK_MATCH_ANY:
+				if (match_flags(flag_info, flag_count,
+					sch->type, sch->flags)) {
+					return true;
+				}
+				break;
+
+			case SCTP_CHUNK_MATCH_ALL:
+				if (match_flags(flag_info, flag_count,
+				    sch->type, sch->flags))
+					SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
+				break;
+
+			case SCTP_CHUNK_MATCH_ONLY:
+				if (!match_flags(flag_info, flag_count,
+				    sch->type, sch->flags))
+					return false;
+				break;
+			}
+		} else {
+			switch (chunk_match_type) {
+			case SCTP_CHUNK_MATCH_ONLY:
+				return false;
+			}
+		}
+	} while (offset < skb->len);
+
+	switch (chunk_match_type) {
+	case SCTP_CHUNK_MATCH_ALL:
+		return SCTP_CHUNKMAP_IS_CLEAR(chunkmapcopy);
+	case SCTP_CHUNK_MATCH_ANY:
+		return false;
+	case SCTP_CHUNK_MATCH_ONLY:
+		return true;
+	}
+
+	/* This will never be reached, but required to stop compiler whine */
+	return false;
+}
+
+static bool
+sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_sctp_info *info = par->matchinfo;
+	const struct sctphdr *sh;
+	struct sctphdr _sh;
+
+	if (par->fragoff != 0) {
+		pr_debug("Dropping non-first fragment.. FIXME\n");
+		return false;
+	}
+
+	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
+	if (sh == NULL) {
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
+
+	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
+			&& ntohs(sh->source) <= info->spts[1],
+			XT_SCTP_SRC_PORTS, info->flags, info->invflags) &&
+		SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+			&& ntohs(sh->dest) <= info->dpts[1],
+			XT_SCTP_DEST_PORTS, info->flags, info->invflags) &&
+		SCCHECK(match_packet(skb, par->thoff + sizeof(_sh),
+				     info, &par->hotdrop),
+			XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+}
+
+static int sctp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_sctp_info *info = par->matchinfo;
+
+	if (info->flag_count > ARRAY_SIZE(info->flag_info))
+		return -EINVAL;
+	if (info->flags & ~XT_SCTP_VALID_FLAGS)
+		return -EINVAL;
+	if (info->invflags & ~XT_SCTP_VALID_FLAGS)
+		return -EINVAL;
+	if (info->invflags & ~info->flags)
+		return -EINVAL;
+	if (!(info->flags & XT_SCTP_CHUNK_TYPES))
+		return 0;
+	if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
+	    SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
+		return 0;
+	return -EINVAL;
+}
+
+static struct xt_match sctp_mt_reg[] __read_mostly = {
+	{
+		.name		= "sctp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= sctp_mt_check,
+		.match		= sctp_mt,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "sctp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= sctp_mt_check,
+		.match		= sctp_mt,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
+};
+
+static int __init sctp_mt_init(void)
+{
+	return xt_register_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
+}
+
+static void __exit sctp_mt_exit(void)
+{
+	xt_unregister_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
+}
+
+module_init(sctp_mt_init);
+module_exit(sctp_mt_exit);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
new file mode 100644
index 000000000..731bc2caf
--- /dev/null
+++ b/net/netfilter/xt_set.c
@@ -0,0 +1,712 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ *                         Martin Josefsson <gandalf@wlug.westbo.se>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
+ */
+
+/* Kernel module which implements the set match and SET target
+ * for netfilter/iptables.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <uapi/linux/netfilter/xt_set.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: IP set match and target module");
+MODULE_ALIAS("xt_SET");
+MODULE_ALIAS("ipt_set");
+MODULE_ALIAS("ip6t_set");
+MODULE_ALIAS("ipt_SET");
+MODULE_ALIAS("ip6t_SET");
+
+static inline int
+match_set(ip_set_id_t index, const struct sk_buff *skb,
+	  const struct xt_action_param *par,
+	  struct ip_set_adt_opt *opt, int inv)
+{
+	if (ip_set_test(index, skb, par, opt))
+		inv = !inv;
+	return inv;
+}
+
+#define ADT_OPT(n, f, d, fs, cfs, t, p, b, po, bo)	\
+struct ip_set_adt_opt n = {				\
+	.family	= f,					\
+	.dim = d,					\
+	.flags = fs,					\
+	.cmdflags = cfs,				\
+	.ext.timeout = t,				\
+	.ext.packets = p,				\
+	.ext.bytes = b,					\
+	.ext.packets_op = po,				\
+	.ext.bytes_op = bo,				\
+}
+
+/* Revision 0 interface: backward compatible with netfilter/iptables */
+
+static bool
+set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v0 *info = par->matchinfo;
+
+	ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
+		info->match_set.u.compat.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	return match_set(info->match_set.index, skb, par, &opt,
+			 info->match_set.u.compat.flags & IPSET_INV_MATCH);
+}
+
+static void
+compat_flags(struct xt_set_info_v0 *info)
+{
+	u_int8_t i;
+
+	/* Fill out compatibility data according to enum ip_set_kopt */
+	info->u.compat.dim = IPSET_DIM_ZERO;
+	if (info->u.flags[0] & IPSET_MATCH_INV)
+		info->u.compat.flags |= IPSET_INV_MATCH;
+	for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
+		info->u.compat.dim++;
+		if (info->u.flags[i] & IPSET_SRC)
+			info->u.compat.flags |= (1 << info->u.compat.dim);
+	}
+}
+
+static int
+set_match_v0_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_set_info_match_v0 *info = par->matchinfo;
+	ip_set_id_t index;
+
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
+
+	if (index == IPSET_INVALID_ID) {
+		pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+				    info->match_set.index);
+		return -ENOENT;
+	}
+	if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
+		pr_info_ratelimited("set match dimension is over the limit!\n");
+		ip_set_nfnl_put(par->net, info->match_set.index);
+		return -ERANGE;
+	}
+
+	/* Fill out compatibility data */
+	compat_flags(&info->match_set);
+
+	return 0;
+}
+
+static void
+set_match_v0_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_set_info_match_v0 *info = par->matchinfo;
+
+	ip_set_nfnl_put(par->net, info->match_set.index);
+}
+
+/* Revision 1 match */
+
+static bool
+set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v1 *info = par->matchinfo;
+
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
+		info->match_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	if (opt.flags & IPSET_RETURN_NOMATCH)
+		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
+	return match_set(info->match_set.index, skb, par, &opt,
+			 info->match_set.flags & IPSET_INV_MATCH);
+}
+
+static int
+set_match_v1_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_set_info_match_v1 *info = par->matchinfo;
+	ip_set_id_t index;
+
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
+
+	if (index == IPSET_INVALID_ID) {
+		pr_info_ratelimited("Cannot find set identified by id %u to match\n",
+				    info->match_set.index);
+		return -ENOENT;
+	}
+	if (info->match_set.dim > IPSET_DIM_MAX) {
+		pr_info_ratelimited("set match dimension is over the limit!\n");
+		ip_set_nfnl_put(par->net, info->match_set.index);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_match_v1_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_set_info_match_v1 *info = par->matchinfo;
+
+	ip_set_nfnl_put(par->net, info->match_set.index);
+}
+
+/* Revision 3 match */
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v3 *info = par->matchinfo;
+
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
+		info->match_set.flags, info->flags, UINT_MAX,
+		info->packets.value, info->bytes.value,
+		info->packets.op, info->bytes.op);
+
+	if (info->packets.op != IPSET_COUNTER_NONE ||
+	    info->bytes.op != IPSET_COUNTER_NONE)
+		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+	return match_set(info->match_set.index, skb, par, &opt,
+			 info->match_set.flags & IPSET_INV_MATCH);
+}
+
+#define set_match_v3_checkentry	set_match_v1_checkentry
+#define set_match_v3_destroy	set_match_v1_destroy
+
+/* Revision 4 match */
+
+static bool
+set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v4 *info = par->matchinfo;
+
+	ADT_OPT(opt, xt_family(par), info->match_set.dim,
+		info->match_set.flags, info->flags, UINT_MAX,
+		info->packets.value, info->bytes.value,
+		info->packets.op, info->bytes.op);
+
+	if (info->packets.op != IPSET_COUNTER_NONE ||
+	    info->bytes.op != IPSET_COUNTER_NONE)
+		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+	return match_set(info->match_set.index, skb, par, &opt,
+			 info->match_set.flags & IPSET_INV_MATCH);
+}
+
+#define set_match_v4_checkentry	set_match_v1_checkentry
+#define set_match_v4_destroy	set_match_v1_destroy
+
+/* Revision 0 interface: backward compatible with netfilter/iptables */
+
+static unsigned int
+set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v0 *info = par->targinfo;
+
+	ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
+		info->add_set.u.compat.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+	ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
+		info->del_set.u.compat.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par, &add_opt);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par, &del_opt);
+
+	return XT_CONTINUE;
+}
+
+static int
+set_target_v0_checkentry(const struct xt_tgchk_param *par)
+{
+	struct xt_set_info_target_v0 *info = par->targinfo;
+	ip_set_id_t index;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(par->net, info->add_set.index);
+			return -ENOENT;
+		}
+	}
+	if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
+	    info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
+		pr_info_ratelimited("SET target dimension over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->del_set.index);
+		return -ERANGE;
+	}
+
+	/* Fill out compatibility data */
+	compat_flags(&info->add_set);
+	compat_flags(&info->del_set);
+
+	return 0;
+}
+
+static void
+set_target_v0_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target_v0 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+}
+
+/* Revision 1 target */
+
+static unsigned int
+set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v1 *info = par->targinfo;
+
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
+		info->add_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
+		info->del_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par, &add_opt);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par, &del_opt);
+
+	return XT_CONTINUE;
+}
+
+static int
+set_target_v1_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct xt_set_info_target_v1 *info = par->targinfo;
+	ip_set_id_t index;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(par->net, info->add_set.index);
+			return -ENOENT;
+		}
+	}
+	if (info->add_set.dim > IPSET_DIM_MAX ||
+	    info->del_set.dim > IPSET_DIM_MAX) {
+		pr_info_ratelimited("SET target dimension over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->del_set.index);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_target_v1_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target_v1 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+}
+
+/* Revision 2 target */
+
+static unsigned int
+set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v2 *info = par->targinfo;
+
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
+		info->add_set.flags, info->flags, info->timeout,
+		0, 0, 0, 0);
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
+		info->del_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	/* Normalize to fit into jiffies */
+	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+	    add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
+		add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par, &add_opt);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par, &del_opt);
+
+	return XT_CONTINUE;
+}
+
+#define set_target_v2_checkentry	set_target_v1_checkentry
+#define set_target_v2_destroy		set_target_v1_destroy
+
+/* Revision 3 target */
+
+#define MOPT(opt, member)	((opt).ext.skbinfo.member)
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+	int ret;
+
+	ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
+		info->add_set.flags, info->flags, info->timeout,
+		0, 0, 0, 0);
+	ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
+		info->del_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+	ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
+		info->map_set.flags, 0, UINT_MAX,
+		0, 0, 0, 0);
+
+	/* Normalize to fit into jiffies */
+	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+	    add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
+		add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par, &add_opt);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par, &del_opt);
+	if (info->map_set.index != IPSET_INVALID_ID) {
+		map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+						   IPSET_FLAG_MAP_SKBPRIO |
+						   IPSET_FLAG_MAP_SKBQUEUE);
+		ret = match_set(info->map_set.index, skb, par, &map_opt,
+				info->map_set.flags & IPSET_INV_MATCH);
+		if (!ret)
+			return XT_CONTINUE;
+		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+			skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask))
+				    ^ MOPT(map_opt, skbmark);
+		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+			skb->priority = MOPT(map_opt, skbprio);
+		if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+		    skb->dev &&
+		    skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue))
+			skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue));
+	}
+	return XT_CONTINUE;
+}
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+	ip_set_id_t index;
+	int ret = 0;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find add_set index %u as target\n",
+					    info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find del_set index %u as target\n",
+					    info->del_set.index);
+			ret = -ENOENT;
+			goto cleanup_add;
+		}
+	}
+
+	if (info->map_set.index != IPSET_INVALID_ID) {
+		if (strncmp(par->table, "mangle", 7)) {
+			pr_info_ratelimited("--map-set only usable from mangle table\n");
+			ret = -EINVAL;
+			goto cleanup_del;
+		}
+		if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+		     (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+		     (par->hook_mask & ~(1 << NF_INET_FORWARD |
+					 1 << NF_INET_LOCAL_OUT |
+					 1 << NF_INET_POST_ROUTING))) {
+			pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
+			ret = -EINVAL;
+			goto cleanup_del;
+		}
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->map_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_info_ratelimited("Cannot find map_set index %u as target\n",
+					    info->map_set.index);
+			ret = -ENOENT;
+			goto cleanup_del;
+		}
+	}
+
+	if (info->add_set.dim > IPSET_DIM_MAX ||
+	    info->del_set.dim > IPSET_DIM_MAX ||
+	    info->map_set.dim > IPSET_DIM_MAX) {
+		pr_info_ratelimited("SET target dimension over the limit!\n");
+		ret = -ERANGE;
+		goto cleanup_mark;
+	}
+
+	return 0;
+cleanup_mark:
+	if (info->map_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->map_set.index);
+cleanup_del:
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+cleanup_add:
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	return ret;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+	if (info->map_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+static struct xt_match set_matches[] __read_mostly = {
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 0,
+		.match		= set_match_v0,
+		.matchsize	= sizeof(struct xt_set_info_match_v0),
+		.checkentry	= set_match_v0_checkentry,
+		.destroy	= set_match_v0_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 1,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 1,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	/* --return-nomatch flag support */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 2,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 2,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	/* counters support: update, match */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	/* new revision for counters support: update, match */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 4,
+		.match		= set_match_v4,
+		.matchsize	= sizeof(struct xt_set_info_match_v4),
+		.checkentry	= set_match_v4_checkentry,
+		.destroy	= set_match_v4_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 4,
+		.match		= set_match_v4,
+		.matchsize	= sizeof(struct xt_set_info_match_v4),
+		.checkentry	= set_match_v4_checkentry,
+		.destroy	= set_match_v4_destroy,
+		.me		= THIS_MODULE
+	},
+};
+
+static struct xt_target set_targets[] __read_mostly = {
+	{
+		.name		= "SET",
+		.revision	= 0,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v0,
+		.targetsize	= sizeof(struct xt_set_info_target_v0),
+		.checkentry	= set_target_v0_checkentry,
+		.destroy	= set_target_v0_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v1,
+		.targetsize	= sizeof(struct xt_set_info_target_v1),
+		.checkentry	= set_target_v1_checkentry,
+		.destroy	= set_target_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.target		= set_target_v1,
+		.targetsize	= sizeof(struct xt_set_info_target_v1),
+		.checkentry	= set_target_v1_checkentry,
+		.destroy	= set_target_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	/* --timeout and --exist flags support */
+	{
+		.name		= "SET",
+		.revision	= 2,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v2,
+		.targetsize	= sizeof(struct xt_set_info_target_v2),
+		.checkentry	= set_target_v2_checkentry,
+		.destroy	= set_target_v2_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 2,
+		.family		= NFPROTO_IPV6,
+		.target		= set_target_v2,
+		.targetsize	= sizeof(struct xt_set_info_target_v2),
+		.checkentry	= set_target_v2_checkentry,
+		.destroy	= set_target_v2_destroy,
+		.me		= THIS_MODULE
+	},
+	/* --map-set support */
+	{
+		.name		= "SET",
+		.revision	= 3,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v3,
+		.targetsize	= sizeof(struct xt_set_info_target_v3),
+		.checkentry	= set_target_v3_checkentry,
+		.destroy	= set_target_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 3,
+		.family		= NFPROTO_IPV6,
+		.target		= set_target_v3,
+		.targetsize	= sizeof(struct xt_set_info_target_v3),
+		.checkentry	= set_target_v3_checkentry,
+		.destroy	= set_target_v3_destroy,
+		.me		= THIS_MODULE
+	},
+};
+
+static int __init xt_set_init(void)
+{
+	int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches));
+
+	if (!ret) {
+		ret = xt_register_targets(set_targets,
+					  ARRAY_SIZE(set_targets));
+		if (ret)
+			xt_unregister_matches(set_matches,
+					      ARRAY_SIZE(set_matches));
+	}
+	return ret;
+}
+
+static void __exit xt_set_fini(void)
+{
+	xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches));
+	xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets));
+}
+
+module_init(xt_set_init);
+module_exit(xt_set_fini);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
new file mode 100644
index 000000000..76e01f292
--- /dev/null
+++ b/net/netfilter/xt_socket.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/inet6_hashtables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+
+#include <net/netfilter/nf_socket.h>
+#include <linux/netfilter/xt_socket.h>
+
+/* "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple
+ *
+ *   - match: if there's a non-zero bound listener (possibly with a
+ *     non-local address) We don't accept zero-bound listeners, since
+ *     then local services could intercept traffic going through the
+ *     box.
+ */
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+	     const struct xt_socket_mtinfo1 *info)
+{
+	struct sk_buff *pskb = (struct sk_buff *)skb;
+	struct sock *sk = skb->sk;
+
+	if (sk && !net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
+	if (!sk)
+		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+
+	if (sk) {
+		bool wildcard;
+		bool transparent = true;
+
+		/* Ignore sockets listening on INADDR_ANY,
+		 * unless XT_SOCKET_NOWILDCARD is set
+		 */
+		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+			    sk_fullsock(sk) &&
+			    inet_sk(sk)->inet_rcv_saddr == 0);
+
+		/* Ignore non-transparent sockets,
+		 * if XT_SOCKET_TRANSPARENT is used
+		 */
+		if (info->flags & XT_SOCKET_TRANSPARENT)
+			transparent = inet_sk_transparent(sk);
+
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent && sk_fullsock(sk))
+			pskb->mark = READ_ONCE(sk->sk_mark);
+
+		if (sk != skb->sk)
+			sock_gen_put(sk);
+
+		if (wildcard || !transparent)
+			sk = NULL;
+	}
+
+	return sk != NULL;
+}
+
+static bool
+socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	static struct xt_socket_mtinfo1 xt_info_v0 = {
+		.flags = 0,
+	};
+
+	return socket_match(skb, par, &xt_info_v0);
+}
+
+static bool
+socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	return socket_match(skb, par, par->matchinfo);
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static bool
+socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+	struct sk_buff *pskb = (struct sk_buff *)skb;
+	struct sock *sk = skb->sk;
+
+	if (sk && !net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
+	if (!sk)
+		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+
+	if (sk) {
+		bool wildcard;
+		bool transparent = true;
+
+		/* Ignore sockets listening on INADDR_ANY
+		 * unless XT_SOCKET_NOWILDCARD is set
+		 */
+		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
+			    sk_fullsock(sk) &&
+			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));
+
+		/* Ignore non-transparent sockets,
+		 * if XT_SOCKET_TRANSPARENT is used
+		 */
+		if (info->flags & XT_SOCKET_TRANSPARENT)
+			transparent = inet_sk_transparent(sk);
+
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent && sk_fullsock(sk))
+			pskb->mark = READ_ONCE(sk->sk_mark);
+
+		if (sk != skb->sk)
+			sock_gen_put(sk);
+
+		if (wildcard || !transparent)
+			sk = NULL;
+	}
+
+	return sk != NULL;
+}
+#endif
+
+static int socket_mt_enable_defrag(struct net *net, int family)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		return nf_defrag_ipv4_enable(net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	case NFPROTO_IPV6:
+		return nf_defrag_ipv6_enable(net);
+#endif
+	}
+	WARN_ONCE(1, "Unknown family %d\n", family);
+	return 0;
+}
+
+static int socket_mt_v1_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+	int err;
+
+	err = socket_mt_enable_defrag(par->net, par->family);
+	if (err)
+		return err;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V1) {
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V1);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int socket_mt_v2_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
+	int err;
+
+	err = socket_mt_enable_defrag(par->net, par->family);
+	if (err)
+		return err;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V2) {
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V2);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int socket_mt_v3_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo3 *info =
+				    (struct xt_socket_mtinfo3 *)par->matchinfo;
+	int err;
+
+	err = socket_mt_enable_defrag(par->net, par->family);
+	if (err)
+		return err;
+	if (info->flags & ~XT_SOCKET_FLAGS_V3) {
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_SOCKET_FLAGS_V3);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void socket_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	if (par->family == NFPROTO_IPV4)
+		nf_defrag_ipv4_disable(par->net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	else if (par->family == NFPROTO_IPV6)
+		nf_defrag_ipv6_disable(par->net);
+#endif
+}
+
+static struct xt_match socket_mt_reg[] __read_mostly = {
+	{
+		.name		= "socket",
+		.revision	= 0,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v0,
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "socket",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2_v3,
+		.destroy	= socket_mt_destroy,
+		.checkentry	= socket_mt_v1_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "socket",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2_v3,
+		.checkentry	= socket_mt_v1_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.destroy	= socket_mt_destroy,
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#endif
+	{
+		.name		= "socket",
+		.revision	= 2,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2_v3,
+		.checkentry	= socket_mt_v2_check,
+		.destroy	= socket_mt_destroy,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "socket",
+		.revision	= 2,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2_v3,
+		.checkentry	= socket_mt_v2_check,
+		.destroy	= socket_mt_destroy,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#endif
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.destroy	= socket_mt_destroy,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.destroy	= socket_mt_destroy,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init socket_mt_init(void)
+{
+	return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
+}
+
+static void __exit socket_mt_exit(void)
+{
+	xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
+}
+
+module_init(socket_mt_init);
+module_exit(socket_mt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("x_tables socket match module");
+MODULE_ALIAS("ipt_socket");
+MODULE_ALIAS("ip6t_socket");
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
new file mode 100644
index 000000000..bbe07b1be
--- /dev/null
+++ b/net/netfilter/xt_state.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match connection tracking information. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_state.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module");
+MODULE_ALIAS("ipt_state");
+MODULE_ALIAS("ip6t_state");
+
+static bool
+state_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_state_info *sinfo = par->matchinfo;
+	enum ip_conntrack_info ctinfo;
+	unsigned int statebit;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+	if (ct)
+		statebit = XT_STATE_BIT(ctinfo);
+	else if (ctinfo == IP_CT_UNTRACKED)
+		statebit = XT_STATE_UNTRACKED;
+	else
+		statebit = XT_STATE_INVALID;
+
+	return (sinfo->statemask & statebit);
+}
+
+static int state_mt_check(const struct xt_mtchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void state_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
+static struct xt_match state_mt_reg __read_mostly = {
+	.name       = "state",
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = state_mt_check,
+	.match      = state_mt,
+	.destroy    = state_mt_destroy,
+	.matchsize  = sizeof(struct xt_state_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init state_mt_init(void)
+{
+	return xt_register_match(&state_mt_reg);
+}
+
+static void __exit state_mt_exit(void)
+{
+	xt_unregister_match(&state_mt_reg);
+}
+
+module_init(state_mt_init);
+module_exit(state_mt_exit);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
new file mode 100644
index 000000000..b26c1dcfc
--- /dev/null
+++ b/net/netfilter/xt_statistic.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on ipt_random and ipt_nth by Fabrice MARIE <fabrice@netfilter.org>.
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter/xt_statistic.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/module.h>
+
+struct xt_statistic_priv {
+	atomic_t count;
+} ____cacheline_aligned_in_smp;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
+MODULE_ALIAS("ipt_statistic");
+MODULE_ALIAS("ip6t_statistic");
+
+static bool
+statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_statistic_info *info = par->matchinfo;
+	bool ret = info->flags & XT_STATISTIC_INVERT;
+	int nval, oval;
+
+	switch (info->mode) {
+	case XT_STATISTIC_MODE_RANDOM:
+		if ((get_random_u32() & 0x7FFFFFFF) < info->u.random.probability)
+			ret = !ret;
+		break;
+	case XT_STATISTIC_MODE_NTH:
+		do {
+			oval = atomic_read(&info->master->count);
+			nval = (oval == info->u.nth.every) ? 0 : oval + 1;
+		} while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
+		if (nval == 0)
+			ret = !ret;
+		break;
+	}
+
+	return ret;
+}
+
+static int statistic_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_statistic_info *info = par->matchinfo;
+
+	if (info->mode > XT_STATISTIC_MODE_MAX ||
+	    info->flags & ~XT_STATISTIC_MASK)
+		return -EINVAL;
+
+	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
+	if (info->master == NULL)
+		return -ENOMEM;
+	atomic_set(&info->master->count, info->u.nth.count);
+
+	return 0;
+}
+
+static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_statistic_info *info = par->matchinfo;
+
+	kfree(info->master);
+}
+
+static struct xt_match xt_statistic_mt_reg __read_mostly = {
+	.name       = "statistic",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = statistic_mt,
+	.checkentry = statistic_mt_check,
+	.destroy    = statistic_mt_destroy,
+	.matchsize  = sizeof(struct xt_statistic_info),
+	.usersize   = offsetof(struct xt_statistic_info, master),
+	.me         = THIS_MODULE,
+};
+
+static int __init statistic_mt_init(void)
+{
+	return xt_register_match(&xt_statistic_mt_reg);
+}
+
+static void __exit statistic_mt_exit(void)
+{
+	xt_unregister_match(&xt_statistic_mt_reg);
+}
+
+module_init(statistic_mt_init);
+module_exit(statistic_mt_exit);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
new file mode 100644
index 000000000..8ce25bc9b
--- /dev/null
+++ b/net/netfilter/xt_string.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* String matching match for iptables
+ *
+ * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
+ */
+
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_string.h>
+#include <linux/textsearch.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
+MODULE_DESCRIPTION("Xtables: string-based matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_string");
+MODULE_ALIAS("ip6t_string");
+MODULE_ALIAS("ebt_string");
+
+static bool
+string_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_string_info *conf = par->matchinfo;
+	bool invert;
+
+	invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
+
+	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
+			     conf->to_offset, conf->config)
+			     != UINT_MAX) ^ invert;
+}
+
+#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
+
+static int string_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_string_info *conf = par->matchinfo;
+	struct ts_config *ts_conf;
+	int flags = TS_AUTOLOAD;
+
+	/* Damn, can't handle this case properly with iptables... */
+	if (conf->from_offset > conf->to_offset)
+		return -EINVAL;
+	if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
+		return -EINVAL;
+	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
+		return -EINVAL;
+	if (conf->u.v1.flags &
+	    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
+		return -EINVAL;
+	if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
+		flags |= TS_IGNORECASE;
+	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
+				     GFP_KERNEL, flags);
+	if (IS_ERR(ts_conf))
+		return PTR_ERR(ts_conf);
+
+	conf->config = ts_conf;
+	return 0;
+}
+
+static void string_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
+}
+
+static struct xt_match xt_string_mt_reg __read_mostly = {
+	.name       = "string",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = string_mt_check,
+	.match      = string_mt,
+	.destroy    = string_mt_destroy,
+	.matchsize  = sizeof(struct xt_string_info),
+	.usersize   = offsetof(struct xt_string_info, config),
+	.me         = THIS_MODULE,
+};
+
+static int __init string_mt_init(void)
+{
+	return xt_register_match(&xt_string_mt_reg);
+}
+
+static void __exit string_mt_exit(void)
+{
+	xt_unregister_match(&xt_string_mt_reg);
+}
+
+module_init(string_mt_init);
+module_exit(string_mt_exit);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
new file mode 100644
index 000000000..37704ab01
--- /dev/null
+++ b/net/netfilter/xt_tcpmss.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Kernel module to match TCP MSS values. */
+
+/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Portions (C) 2005 by Harald Welte <laforge@netfilter.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/xt_tcpmss.h>
+#include <linux/netfilter/x_tables.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: TCP MSS match");
+MODULE_ALIAS("ipt_tcpmss");
+MODULE_ALIAS("ip6t_tcpmss");
+
+static bool
+tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_tcpmss_match_info *info = par->matchinfo;
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	const u_int8_t *op;
+	u8 _opt[15 * 4 - sizeof(_tcph)];
+	unsigned int i, optlen;
+
+	/* If we don't have the whole header, drop packet. */
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		goto dropit;
+
+	/* Malformed. */
+	if (th->doff*4 < sizeof(*th))
+		goto dropit;
+
+	optlen = th->doff*4 - sizeof(*th);
+	if (!optlen)
+		goto out;
+
+	/* Truncated options. */
+	op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt);
+	if (op == NULL)
+		goto dropit;
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == TCPOPT_MSS
+		    && (optlen - i) >= TCPOLEN_MSS
+		    && op[i+1] == TCPOLEN_MSS) {
+			u_int16_t mssval;
+
+			mssval = (op[i+2] << 8) | op[i+3];
+
+			return (mssval >= info->mss_min &&
+				mssval <= info->mss_max) ^ info->invert;
+		}
+		if (op[i] < 2)
+			i++;
+		else
+			i += op[i+1] ? : 1;
+	}
+out:
+	return info->invert;
+
+dropit:
+	par->hotdrop = true;
+	return false;
+}
+
+static struct xt_match tcpmss_mt_reg[] __read_mostly = {
+	{
+		.name		= "tcpmss",
+		.family		= NFPROTO_IPV4,
+		.match		= tcpmss_mt,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcpmss",
+		.family		= NFPROTO_IPV6,
+		.match		= tcpmss_mt,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init tcpmss_mt_init(void)
+{
+	return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
+}
+
+static void __exit tcpmss_mt_exit(void)
+{
+	xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
+}
+
+module_init(tcpmss_mt_init);
+module_exit(tcpmss_mt_exit);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
new file mode 100644
index 000000000..11ec2abf0
--- /dev/null
+++ b/net/netfilter/xt_tcpudp.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xt_tcp");
+MODULE_ALIAS("xt_udp");
+MODULE_ALIAS("ipt_udp");
+MODULE_ALIAS("ipt_tcp");
+MODULE_ALIAS("ip6t_udp");
+MODULE_ALIAS("ip6t_tcp");
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline bool
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
+{
+	return (port >= min && port <= max) ^ invert;
+}
+
+static bool
+tcp_find_option(u_int8_t option,
+		const struct sk_buff *skb,
+		unsigned int protoff,
+		unsigned int optlen,
+		bool invert,
+		bool *hotdrop)
+{
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	const u_int8_t *op;
+	u_int8_t _opt[60 - sizeof(struct tcphdr)];
+	unsigned int i;
+
+	pr_debug("finding option\n");
+
+	if (!optlen)
+		return invert;
+
+	/* If we don't have the whole header, drop packet. */
+	op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr),
+				optlen, _opt);
+	if (op == NULL) {
+		*hotdrop = true;
+		return false;
+	}
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == option) return !invert;
+		if (op[i] < 2) i++;
+		else i += op[i+1]?:1;
+	}
+
+	return invert;
+}
+
+static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct tcphdr *th;
+	struct tcphdr _tcph;
+	const struct xt_tcp *tcpinfo = par->matchinfo;
+
+	if (par->fragoff != 0) {
+		/* To quote Alan:
+
+		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
+		   causes this. Its a cracker trying to break in by doing a
+		   flag overwrite to pass the direction checks.
+		*/
+		if (par->fragoff == 1) {
+			pr_debug("Dropping evil TCP offset=1 frag.\n");
+			par->hotdrop = true;
+		}
+		/* Must not be a fragment. */
+		return false;
+	}
+
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+			ntohs(th->source),
+			!!(tcpinfo->invflags & XT_TCP_INV_SRCPT)))
+		return false;
+	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+			ntohs(th->dest),
+			!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
+		return false;
+	if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS,
+		     (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp))
+		return false;
+	if (tcpinfo->option) {
+		if (th->doff * 4 < sizeof(_tcph)) {
+			par->hotdrop = true;
+			return false;
+		}
+		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
+				     th->doff*4 - sizeof(_tcph),
+				     tcpinfo->invflags & XT_TCP_INV_OPTION,
+				     &par->hotdrop))
+			return false;
+	}
+	return true;
+}
+
+static int tcp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_tcp *tcpinfo = par->matchinfo;
+
+	/* Must specify no unknown invflags */
+	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
+}
+
+static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct udphdr *uh;
+	struct udphdr _udph;
+	const struct xt_udp *udpinfo = par->matchinfo;
+
+	/* Must not be a fragment. */
+	if (par->fragoff != 0)
+		return false;
+
+	uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		pr_debug("Dropping evil UDP tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	return port_match(udpinfo->spts[0], udpinfo->spts[1],
+			  ntohs(uh->source),
+			  !!(udpinfo->invflags & XT_UDP_INV_SRCPT))
+		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+			      ntohs(uh->dest),
+			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
+}
+
+static int udp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_udp *udpinfo = par->matchinfo;
+
+	/* Must specify no unknown invflags */
+	return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0;
+}
+
+static struct xt_match tcpudp_mt_reg[] __read_mostly = {
+	{
+		.name		= "tcp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= tcp_mt_check,
+		.match		= tcp_mt,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= tcp_mt_check,
+		.match		= tcp_mt,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udplite",
+		.family		= NFPROTO_IPV4,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDPLITE,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udplite",
+		.family		= NFPROTO_IPV6,
+		.checkentry	= udp_mt_check,
+		.match		= udp_mt,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDPLITE,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init tcpudp_mt_init(void)
+{
+	return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
+}
+
+static void __exit tcpudp_mt_exit(void)
+{
+	xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
+}
+
+module_init(tcpudp_mt_init);
+module_exit(tcpudp_mt_exit);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
new file mode 100644
index 000000000..6aa12d0f5
--- /dev/null
+++ b/net/netfilter/xt_time.c
@@ -0,0 +1,300 @@
+/*
+ *	xt_time
+ *	Copyright © CC Computer Consultants GmbH, 2007
+ *
+ *	based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
+ *	This is a module which is used for time matching
+ *	It is using some modified code from dietlibc (localtime() function)
+ *	that you can find at https://www.fefe.de/dietlibc/
+ *	This file is distributed under the terms of the GNU General Public
+ *	License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_time.h>
+
+struct xtm {
+	u_int8_t month;    /* (1-12) */
+	u_int8_t monthday; /* (1-31) */
+	u_int8_t weekday;  /* (1-7) */
+	u_int8_t hour;     /* (0-23) */
+	u_int8_t minute;   /* (0-59) */
+	u_int8_t second;   /* (0-59) */
+	unsigned int dse;
+};
+
+extern struct timezone sys_tz; /* ouch */
+
+static const u_int16_t days_since_year[] = {
+	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
+};
+
+static const u_int16_t days_since_leapyear[] = {
+	0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335,
+};
+
+/*
+ * Since time progresses forward, it is best to organize this array in reverse,
+ * to minimize lookup time.
+ */
+enum {
+	DSE_FIRST = 2039,
+	SECONDS_PER_DAY = 86400,
+};
+static const u_int16_t days_since_epoch[] = {
+	/* 2039 - 2030 */
+	25202, 24837, 24472, 24106, 23741, 23376, 23011, 22645, 22280, 21915,
+	/* 2029 - 2020 */
+	21550, 21184, 20819, 20454, 20089, 19723, 19358, 18993, 18628, 18262,
+	/* 2019 - 2010 */
+	17897, 17532, 17167, 16801, 16436, 16071, 15706, 15340, 14975, 14610,
+	/* 2009 - 2000 */
+	14245, 13879, 13514, 13149, 12784, 12418, 12053, 11688, 11323, 10957,
+	/* 1999 - 1990 */
+	10592, 10227, 9862, 9496, 9131, 8766, 8401, 8035, 7670, 7305,
+	/* 1989 - 1980 */
+	6940, 6574, 6209, 5844, 5479, 5113, 4748, 4383, 4018, 3652,
+	/* 1979 - 1970 */
+	3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
+};
+
+static inline bool is_leap(unsigned int y)
+{
+	return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
+}
+
+/*
+ * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
+ * Since we match against days and daytime, the SSTE value needs to be
+ * computed back into human-readable dates.
+ *
+ * This is done in three separate functions so that the most expensive
+ * calculations are done last, in case a "simple match" can be found earlier.
+ */
+static inline unsigned int localtime_1(struct xtm *r, time64_t time)
+{
+	unsigned int v, w;
+
+	/* Each day has 86400s, so finding the hour/minute is actually easy. */
+	div_u64_rem(time, SECONDS_PER_DAY, &v);
+	r->second = v % 60;
+	w         = v / 60;
+	r->minute = w % 60;
+	r->hour   = w / 60;
+	return v;
+}
+
+static inline void localtime_2(struct xtm *r, time64_t time)
+{
+	/*
+	 * Here comes the rest (weekday, monthday). First, divide the SSTE
+	 * by seconds-per-day to get the number of _days_ since the epoch.
+	 */
+	r->dse = div_u64(time, SECONDS_PER_DAY);
+
+	/*
+	 * 1970-01-01 (w=0) was a Thursday (4).
+	 * -1 and +1 map Sunday properly onto 7.
+	 */
+	r->weekday = (4 + r->dse - 1) % 7 + 1;
+}
+
+static void localtime_3(struct xtm *r, time64_t time)
+{
+	unsigned int year, i, w = r->dse;
+
+	/*
+	 * In each year, a certain number of days-since-the-epoch have passed.
+	 * Find the year that is closest to said days.
+	 *
+	 * Consider, for example, w=21612 (2029-03-04). Loop will abort on
+	 * dse[i] <= w, which happens when dse[i] == 21550. This implies
+	 * year == 2009. w will then be 62.
+	 */
+	for (i = 0, year = DSE_FIRST; days_since_epoch[i] > w;
+	    ++i, --year)
+		/* just loop */;
+
+	w -= days_since_epoch[i];
+
+	/*
+	 * By now we have the current year, and the day of the year.
+	 * r->yearday = w;
+	 *
+	 * On to finding the month (like above). In each month, a certain
+	 * number of days-since-New Year have passed, and find the closest
+	 * one.
+	 *
+	 * Consider w=62 (in a non-leap year). Loop will abort on
+	 * dsy[i] < w, which happens when dsy[i] == 31+28 (i == 2).
+	 * Concludes i == 2, i.e. 3rd month => March.
+	 *
+	 * (A different approach to use would be to subtract a monthlength
+	 * from w repeatedly while counting.)
+	 */
+	if (is_leap(year)) {
+		/* use days_since_leapyear[] in a leap year */
+		for (i = ARRAY_SIZE(days_since_leapyear) - 1;
+		    i > 0 && days_since_leapyear[i] > w; --i)
+			/* just loop */;
+		r->monthday = w - days_since_leapyear[i] + 1;
+	} else {
+		for (i = ARRAY_SIZE(days_since_year) - 1;
+		    i > 0 && days_since_year[i] > w; --i)
+			/* just loop */;
+		r->monthday = w - days_since_year[i] + 1;
+	}
+
+	r->month    = i + 1;
+}
+
+static bool
+time_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_time_info *info = par->matchinfo;
+	unsigned int packet_time;
+	struct xtm current_time;
+	time64_t stamp;
+
+	/*
+	 * We need real time here, but we can neither use skb->tstamp
+	 * nor __net_timestamp().
+	 *
+	 * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+	 * use different clock types (real vs monotonic).
+	 *
+	 * Suppose you have two rules:
+	 *	1. match before 13:00
+	 *	2. match after 13:00
+	 *
+	 * If you match against processing time (ktime_get_real_seconds) it
+	 * may happen that the same packet matches both rules if
+	 * it arrived at the right moment before 13:00, so it would be
+	 * better to check skb->tstamp and set it via __net_timestamp()
+	 * if needed.  This however breaks outgoing packets tx timestamp,
+	 * and causes them to get delayed forever by fq packet scheduler.
+	 */
+	stamp = ktime_get_real_seconds();
+
+	if (info->flags & XT_TIME_LOCAL_TZ)
+		/* Adjust for local timezone */
+		stamp -= 60 * sys_tz.tz_minuteswest;
+
+	/*
+	 * xt_time will match when _all_ of the following hold:
+	 *   - 'now' is in the global time range date_start..date_end
+	 *   - 'now' is in the monthday mask
+	 *   - 'now' is in the weekday mask
+	 *   - 'now' is in the daytime range time_start..time_end
+	 * (and by default, libxt_time will set these so as to match)
+	 *
+	 * note: info->date_start/stop are unsigned 32-bit values that
+	 *	 can hold values beyond y2038, but not after y2106.
+	 */
+
+	if (stamp < info->date_start || stamp > info->date_stop)
+		return false;
+
+	packet_time = localtime_1(&current_time, stamp);
+
+	if (info->daytime_start < info->daytime_stop) {
+		if (packet_time < info->daytime_start ||
+		    packet_time > info->daytime_stop)
+			return false;
+	} else {
+		if (packet_time < info->daytime_start &&
+		    packet_time > info->daytime_stop)
+			return false;
+
+		/** if user asked to ignore 'next day', then e.g.
+		 *  '1 PM Wed, August 1st' should be treated
+		 *  like 'Tue 1 PM July 31st'.
+		 *
+		 * This also causes
+		 * 'Monday, "23:00 to 01:00", to match for 2 hours, starting
+		 * Monday 23:00 to Tuesday 01:00.
+		 */
+		if ((info->flags & XT_TIME_CONTIGUOUS) &&
+		     packet_time <= info->daytime_stop)
+			stamp -= SECONDS_PER_DAY;
+	}
+
+	localtime_2(&current_time, stamp);
+
+	if (!(info->weekdays_match & (1 << current_time.weekday)))
+		return false;
+
+	/* Do not spend time computing monthday if all days match anyway */
+	if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) {
+		localtime_3(&current_time, stamp);
+		if (!(info->monthdays_match & (1 << current_time.monthday)))
+			return false;
+	}
+
+	return true;
+}
+
+static int time_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_time_info *info = par->matchinfo;
+
+	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
+	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
+		pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n");
+		return -EDOM;
+	}
+
+	if (info->flags & ~XT_TIME_ALL_FLAGS) {
+		pr_info_ratelimited("unknown flags 0x%x\n",
+				    info->flags & ~XT_TIME_ALL_FLAGS);
+		return -EINVAL;
+	}
+
+	if ((info->flags & XT_TIME_CONTIGUOUS) &&
+	     info->daytime_start < info->daytime_stop)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct xt_match xt_time_mt_reg __read_mostly = {
+	.name       = "time",
+	.family     = NFPROTO_UNSPEC,
+	.match      = time_mt,
+	.checkentry = time_mt_check,
+	.matchsize  = sizeof(struct xt_time_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init time_mt_init(void)
+{
+	int minutes = sys_tz.tz_minuteswest;
+
+	if (minutes < 0) /* east of Greenwich */
+		pr_info("kernel timezone is +%02d%02d\n",
+			-minutes / 60, -minutes % 60);
+	else /* west of Greenwich */
+		pr_info("kernel timezone is -%02d%02d\n",
+			minutes / 60, minutes % 60);
+
+	return xt_register_match(&xt_time_mt_reg);
+}
+
+static void __exit time_mt_exit(void)
+{
+	xt_unregister_match(&xt_time_mt_reg);
+}
+
+module_init(time_mt_init);
+module_exit(time_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: time-based matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_time");
+MODULE_ALIAS("ip6t_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
new file mode 100644
index 000000000..117d4615d
--- /dev/null
+++ b/net/netfilter/xt_u32.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	xt_u32 - kernel module to match u32 packet content
+ *
+ *	Original author: Don Cohen <don@isis.cs3-inc.com>
+ *	(C) CC Computer Consultants GmbH, 2007
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_u32.h>
+
+static bool u32_match_it(const struct xt_u32 *data,
+			 const struct sk_buff *skb)
+{
+	const struct xt_u32_test *ct;
+	unsigned int testind;
+	unsigned int nnums;
+	unsigned int nvals;
+	unsigned int i;
+	__be32 n;
+	u_int32_t pos;
+	u_int32_t val;
+	u_int32_t at;
+
+	/*
+	 * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17"
+	 * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands.
+	 */
+	for (testind = 0; testind < data->ntests; ++testind) {
+		ct  = &data->tests[testind];
+		at  = 0;
+		pos = ct->location[0].number;
+
+		if (skb->len < 4 || pos > skb->len - 4)
+			return false;
+
+		if (skb_copy_bits(skb, pos, &n, sizeof(n)) < 0)
+			BUG();
+		val   = ntohl(n);
+		nnums = ct->nnums;
+
+		/* Inner loop runs over "&", "<<", ">>" and "@" operands */
+		for (i = 1; i < nnums; ++i) {
+			u_int32_t number = ct->location[i].number;
+			switch (ct->location[i].nextop) {
+			case XT_U32_AND:
+				val &= number;
+				break;
+			case XT_U32_LEFTSH:
+				val <<= number;
+				break;
+			case XT_U32_RIGHTSH:
+				val >>= number;
+				break;
+			case XT_U32_AT:
+				if (at + val < at)
+					return false;
+				at += val;
+				pos = number;
+				if (at + 4 < at || skb->len < at + 4 ||
+				    pos > skb->len - at - 4)
+					return false;
+
+				if (skb_copy_bits(skb, at + pos, &n,
+						    sizeof(n)) < 0)
+					BUG();
+				val = ntohl(n);
+				break;
+			}
+		}
+
+		/* Run over the "," and ":" operands */
+		nvals = ct->nvalues;
+		for (i = 0; i < nvals; ++i)
+			if (ct->value[i].min <= val && val <= ct->value[i].max)
+				break;
+
+		if (i >= ct->nvalues)
+			return false;
+	}
+
+	return true;
+}
+
+static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_u32 *data = par->matchinfo;
+	bool ret;
+
+	ret = u32_match_it(data, skb);
+	return ret ^ data->invert;
+}
+
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	const struct xt_u32 *data = par->matchinfo;
+	const struct xt_u32_test *ct;
+	unsigned int i;
+
+	if (data->ntests > ARRAY_SIZE(data->tests))
+		return -EINVAL;
+
+	for (i = 0; i < data->ntests; ++i) {
+		ct = &data->tests[i];
+
+		if (ct->nnums > ARRAY_SIZE(ct->location) ||
+		    ct->nvalues > ARRAY_SIZE(ct->value))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match xt_u32_mt_reg __read_mostly = {
+	.name       = "u32",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = u32_mt,
+	.checkentry = u32_mt_checkentry,
+	.matchsize  = sizeof(struct xt_u32),
+	.me         = THIS_MODULE,
+};
+
+static int __init u32_mt_init(void)
+{
+	return xt_register_match(&xt_u32_mt_reg);
+}
+
+static void __exit u32_mt_exit(void)
+{
+	xt_unregister_match(&xt_u32_mt_reg);
+}
+
+module_init(u32_mt_init);
+module_exit(u32_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_u32");
+MODULE_ALIAS("ip6t_u32");
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:49:45 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:49:45 +0000
commit	2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree	848558de17fb3008cdf4d861b01ac7781903ce39 /net/netfilter
parent	Initial commit. (diff)
download	linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip