Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ipvlan
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
6 files changed, 2545 insertions, 0 deletions
diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile
new file mode 100644
index 0000000000..2020e9dedc
--- /dev/null
+++ b/drivers/net/ipvlan/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Ethernet Ipvlan driver
+#
+
+obj-$(CONFIG_IPVLAN) += ipvlan.o
+obj-$(CONFIG_IPVTAP) += ipvtap.o
+
+ipvlan-objs-$(CONFIG_IPVLAN_L3S) += ipvlan_l3s.o
+ipvlan-objs := ipvlan_core.o ipvlan_main.o $(ipvlan-objs-y)
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
new file mode 100644
index 0000000000..025e0c19ec
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ */
+#ifndef __IPVLAN_H
+#define __IPVLAN_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/addrconf.h>
+#include <net/l3mdev.h>
+
+#define IPVLAN_DRV	"ipvlan"
+#define IPV_DRV_VER	"0.1"
+
+#define IPVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
+#define IPVLAN_HASH_MASK	(IPVLAN_HASH_SIZE - 1)
+
+#define IPVLAN_MAC_FILTER_BITS	8
+#define IPVLAN_MAC_FILTER_SIZE	(1 << IPVLAN_MAC_FILTER_BITS)
+#define IPVLAN_MAC_FILTER_MASK	(IPVLAN_MAC_FILTER_SIZE - 1)
+
+#define IPVLAN_QBACKLOG_LIMIT	1000
+
+typedef enum {
+	IPVL_IPV6 = 0,
+	IPVL_ICMPV6,
+	IPVL_IPV4,
+	IPVL_ARP,
+} ipvl_hdr_type;
+
+struct ipvl_pcpu_stats {
+	u64_stats_t		rx_pkts;
+	u64_stats_t		rx_bytes;
+	u64_stats_t		rx_mcast;
+	u64_stats_t		tx_pkts;
+	u64_stats_t		tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_errs;
+	u32			tx_drps;
+};
+
+struct ipvl_port;
+
+struct ipvl_dev {
+	struct net_device	*dev;
+	struct list_head	pnode;
+	struct ipvl_port	*port;
+	struct net_device	*phy_dev;
+	struct list_head	addrs;
+	struct ipvl_pcpu_stats	__percpu *pcpu_stats;
+	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
+	netdev_features_t	sfeatures;
+	u32			msg_enable;
+	spinlock_t		addrs_lock;
+};
+
+struct ipvl_addr {
+	struct ipvl_dev		*master; /* Back pointer to master */
+	union {
+		struct in6_addr	ip6;	 /* IPv6 address on logical interface */
+		struct in_addr	ip4;	 /* IPv4 address on logical interface */
+	} ipu;
+#define ip6addr	ipu.ip6
+#define ip4addr ipu.ip4
+	struct hlist_node	hlnode;  /* Hash-table linkage */
+	struct list_head	anode;   /* logical-interface linkage */
+	ipvl_hdr_type		atype;
+	struct rcu_head		rcu;
+};
+
+struct ipvl_port {
+	struct net_device	*dev;
+	possible_net_t		pnet;
+	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
+	struct list_head	ipvlans;
+	u16			mode;
+	u16			flags;
+	u16			dev_id_start;
+	struct work_struct	wq;
+	struct sk_buff_head	backlog;
+	int			count;
+	struct ida		ida;
+	netdevice_tracker	dev_tracker;
+};
+
+struct ipvl_skb_cb {
+	bool tx_pkt;
+};
+#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
+
+static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
+{
+	return rcu_dereference(d->rx_handler_data);
+}
+
+static inline struct ipvl_port *ipvlan_port_get_rcu_bh(const struct net_device *d)
+{
+	return rcu_dereference_bh(d->rx_handler_data);
+}
+
+static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d)
+{
+	return rtnl_dereference(d->rx_handler_data);
+}
+
+static inline bool ipvlan_is_private(const struct ipvl_port *port)
+{
+	return !!(port->flags & IPVLAN_F_PRIVATE);
+}
+
+static inline void ipvlan_mark_private(struct ipvl_port *port)
+{
+	port->flags |= IPVLAN_F_PRIVATE;
+}
+
+static inline void ipvlan_clear_private(struct ipvl_port *port)
+{
+	port->flags &= ~IPVLAN_F_PRIVATE;
+}
+
+static inline bool ipvlan_is_vepa(const struct ipvl_port *port)
+{
+	return !!(port->flags & IPVLAN_F_VEPA);
+}
+
+static inline void ipvlan_mark_vepa(struct ipvl_port *port)
+{
+	port->flags |= IPVLAN_F_VEPA;
+}
+
+static inline void ipvlan_clear_vepa(struct ipvl_port *port)
+{
+	port->flags &= ~IPVLAN_F_VEPA;
+}
+
+void ipvlan_init_secret(void);
+unsigned int ipvlan_mac_hash(const unsigned char *addr);
+rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
+void ipvlan_process_multicast(struct work_struct *work);
+int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
+void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
+struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
+				   const void *iaddr, bool is_v6);
+bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
+void ipvlan_ht_addr_del(struct ipvl_addr *addr);
+struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
+				     int addr_type, bool use_dest);
+void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type);
+void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
+		     unsigned int len, bool success, bool mcast);
+int ipvlan_link_new(struct net *src_net, struct net_device *dev,
+		    struct nlattr *tb[], struct nlattr *data[],
+		    struct netlink_ext_ack *extack);
+void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
+void ipvlan_link_setup(struct net_device *dev);
+int ipvlan_link_register(struct rtnl_link_ops *ops);
+#ifdef CONFIG_IPVLAN_L3S
+int ipvlan_l3s_register(struct ipvl_port *port);
+void ipvlan_l3s_unregister(struct ipvl_port *port);
+void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet);
+int ipvlan_l3s_init(void);
+void ipvlan_l3s_cleanup(void);
+#else
+static inline int ipvlan_l3s_register(struct ipvl_port *port)
+{
+	return -ENOTSUPP;
+}
+
+static inline void ipvlan_l3s_unregister(struct ipvl_port *port)
+{
+}
+
+static inline void ipvlan_migrate_l3s_hook(struct net *oldnet,
+					   struct net *newnet)
+{
+}
+
+static inline int ipvlan_l3s_init(void)
+{
+	return 0;
+}
+
+static inline void ipvlan_l3s_cleanup(void)
+{
+}
+#endif /* CONFIG_IPVLAN_L3S */
+
+static inline bool netif_is_ipvlan_port(const struct net_device *dev)
+{
+	return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame;
+}
+
+#endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
new file mode 100644
index 0000000000..2d5b021b4e
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -0,0 +1,772 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ */
+
+#include "ipvlan.h"
+
+static u32 ipvlan_jhash_secret __read_mostly;
+
+void ipvlan_init_secret(void)
+{
+	net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
+}
+
+void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
+			    unsigned int len, bool success, bool mcast)
+{
+	if (likely(success)) {
+		struct ipvl_pcpu_stats *pcptr;
+
+		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
+		u64_stats_update_begin(&pcptr->syncp);
+		u64_stats_inc(&pcptr->rx_pkts);
+		u64_stats_add(&pcptr->rx_bytes, len);
+		if (mcast)
+			u64_stats_inc(&pcptr->rx_mcast);
+		u64_stats_update_end(&pcptr->syncp);
+	} else {
+		this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
+	}
+}
+EXPORT_SYMBOL_GPL(ipvlan_count_rx);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	const struct in6_addr *ip6_addr = iaddr;
+
+	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
+	       IPVLAN_HASH_MASK;
+}
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	return 0;
+}
+#endif
+
+static u8 ipvlan_get_v4_hash(const void *iaddr)
+{
+	const struct in_addr *ip4_addr = iaddr;
+
+	return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
+	       IPVLAN_HASH_MASK;
+}
+
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+	if (!is_v6 && addr->atype == IPVL_IPV4) {
+		struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+		return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (is_v6 && addr->atype == IPVL_IPV6) {
+		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+		return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+	}
+
+	return false;
+}
+
+static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
+					       const void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr;
+	u8 hash;
+
+	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
+	       ipvlan_get_v4_hash(iaddr);
+	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+		if (addr_equal(is_v6, addr, iaddr))
+			return addr;
+	return NULL;
+}
+
+void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
+{
+	struct ipvl_port *port = ipvlan->port;
+	u8 hash;
+
+	hash = (addr->atype == IPVL_IPV6) ?
+	       ipvlan_get_v6_hash(&addr->ip6addr) :
+	       ipvlan_get_v4_hash(&addr->ip4addr);
+	if (hlist_unhashed(&addr->hlnode))
+		hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
+}
+
+void ipvlan_ht_addr_del(struct ipvl_addr *addr)
+{
+	hlist_del_init_rcu(&addr->hlnode);
+}
+
+struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
+				   const void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr, *ret = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+		if (addr_equal(is_v6, addr, iaddr)) {
+			ret = addr;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
+{
+	struct ipvl_dev *ipvlan;
+	bool ret = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+			ret = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
+{
+	void *lyr3h = NULL;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_ARP): {
+		struct arphdr *arph;
+
+		if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev))))
+			return NULL;
+
+		arph = arp_hdr(skb);
+		*type = IPVL_ARP;
+		lyr3h = arph;
+		break;
+	}
+	case htons(ETH_P_IP): {
+		u32 pktlen;
+		struct iphdr *ip4h;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
+			return NULL;
+
+		ip4h = ip_hdr(skb);
+		pktlen = skb_ip_totlen(skb);
+		if (ip4h->ihl < 5 || ip4h->version != 4)
+			return NULL;
+		if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
+			return NULL;
+
+		*type = IPVL_IPV4;
+		lyr3h = ip4h;
+		break;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6): {
+		struct ipv6hdr *ip6h;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
+			return NULL;
+
+		ip6h = ipv6_hdr(skb);
+		if (ip6h->version != 6)
+			return NULL;
+
+		*type = IPVL_IPV6;
+		lyr3h = ip6h;
+		/* Only Neighbour Solicitation pkts need different treatment */
+		if (ipv6_addr_any(&ip6h->saddr) &&
+		    ip6h->nexthdr == NEXTHDR_ICMP) {
+			struct icmp6hdr	*icmph;
+
+			if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
+				return NULL;
+
+			ip6h = ipv6_hdr(skb);
+			icmph = (struct icmp6hdr *)(ip6h + 1);
+
+			if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
+				/* Need to access the ipv6 address in body */
+				if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)
+						+ sizeof(struct in6_addr))))
+					return NULL;
+
+				ip6h = ipv6_hdr(skb);
+				icmph = (struct icmp6hdr *)(ip6h + 1);
+			}
+
+			*type = IPVL_ICMPV6;
+			lyr3h = icmph;
+		}
+		break;
+	}
+#endif
+	default:
+		return NULL;
+	}
+
+	return lyr3h;
+}
+
+unsigned int ipvlan_mac_hash(const unsigned char *addr)
+{
+	u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
+			       ipvlan_jhash_secret);
+
+	return hash & IPVLAN_MAC_FILTER_MASK;
+}
+
+void ipvlan_process_multicast(struct work_struct *work)
+{
+	struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
+	struct ethhdr *ethh;
+	struct ipvl_dev *ipvlan;
+	struct sk_buff *skb, *nskb;
+	struct sk_buff_head list;
+	unsigned int len;
+	unsigned int mac_hash;
+	int ret;
+	u8 pkt_type;
+	bool tx_pkt;
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_bh(&port->backlog.lock);
+	skb_queue_splice_tail_init(&port->backlog, &list);
+	spin_unlock_bh(&port->backlog.lock);
+
+	while ((skb = __skb_dequeue(&list)) != NULL) {
+		struct net_device *dev = skb->dev;
+		bool consumed = false;
+
+		ethh = eth_hdr(skb);
+		tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
+		mac_hash = ipvlan_mac_hash(ethh->h_dest);
+
+		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
+			pkt_type = PACKET_BROADCAST;
+		else
+			pkt_type = PACKET_MULTICAST;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+			if (tx_pkt && (ipvlan->dev == skb->dev))
+				continue;
+			if (!test_bit(mac_hash, ipvlan->mac_filters))
+				continue;
+			if (!(ipvlan->dev->flags & IFF_UP))
+				continue;
+			ret = NET_RX_DROP;
+			len = skb->len + ETH_HLEN;
+			nskb = skb_clone(skb, GFP_ATOMIC);
+			local_bh_disable();
+			if (nskb) {
+				consumed = true;
+				nskb->pkt_type = pkt_type;
+				nskb->dev = ipvlan->dev;
+				if (tx_pkt)
+					ret = dev_forward_skb(ipvlan->dev, nskb);
+				else
+					ret = netif_rx(nskb);
+			}
+			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+			local_bh_enable();
+		}
+		rcu_read_unlock();
+
+		if (tx_pkt) {
+			/* If the packet originated here, send it out. */
+			skb->dev = port->dev;
+			skb->pkt_type = pkt_type;
+			dev_queue_xmit(skb);
+		} else {
+			if (consumed)
+				consume_skb(skb);
+			else
+				kfree_skb(skb);
+		}
+		dev_put(dev);
+		cond_resched();
+	}
+}
+
+static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
+{
+	bool xnet = true;
+
+	if (dev)
+		xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
+
+	skb_scrub_packet(skb, xnet);
+	if (dev)
+		skb->dev = dev;
+}
+
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
+			    bool local)
+{
+	struct ipvl_dev *ipvlan = addr->master;
+	struct net_device *dev = ipvlan->dev;
+	unsigned int len;
+	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
+	bool success = false;
+	struct sk_buff *skb = *pskb;
+
+	len = skb->len + ETH_HLEN;
+	/* Only packets exchanged between two local slaves need to have
+	 * device-up check as well as skb-share check.
+	 */
+	if (local) {
+		if (unlikely(!(dev->flags & IFF_UP))) {
+			kfree_skb(skb);
+			goto out;
+		}
+
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (!skb)
+			goto out;
+
+		*pskb = skb;
+	}
+
+	if (local) {
+		skb->pkt_type = PACKET_HOST;
+		if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
+			success = true;
+	} else {
+		skb->dev = dev;
+		ret = RX_HANDLER_ANOTHER;
+		success = true;
+	}
+
+out:
+	ipvlan_count_rx(ipvlan, len, success, false);
+	return ret;
+}
+
+struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
+				     int addr_type, bool use_dest)
+{
+	struct ipvl_addr *addr = NULL;
+
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPVL_IPV6: {
+		struct ipv6hdr *ip6h;
+		struct in6_addr *i6addr;
+
+		ip6h = (struct ipv6hdr *)lyr3h;
+		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
+		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
+		break;
+	}
+	case IPVL_ICMPV6: {
+		struct nd_msg *ndmh;
+		struct in6_addr *i6addr;
+
+		/* Make sure that the NeighborSolicitation ICMPv6 packets
+		 * are handled to avoid DAD issue.
+		 */
+		ndmh = (struct nd_msg *)lyr3h;
+		if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
+			i6addr = &ndmh->target;
+			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
+		}
+		break;
+	}
+#endif
+	case IPVL_IPV4: {
+		struct iphdr *ip4h;
+		__be32 *i4addr;
+
+		ip4h = (struct iphdr *)lyr3h;
+		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
+		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
+		break;
+	}
+	case IPVL_ARP: {
+		struct arphdr *arph;
+		unsigned char *arp_ptr;
+		__be32 dip;
+
+		arph = (struct arphdr *)lyr3h;
+		arp_ptr = (unsigned char *)(arph + 1);
+		if (use_dest)
+			arp_ptr += (2 * port->dev->addr_len) + 4;
+		else
+			arp_ptr += port->dev->addr_len;
+
+		memcpy(&dip, arp_ptr, 4);
+		addr = ipvlan_ht_addr_lookup(port, &dip, false);
+		break;
+	}
+	}
+
+	return addr;
+}
+
+static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
+{
+	const struct iphdr *ip4h = ip_hdr(skb);
+	struct net_device *dev = skb->dev;
+	struct net *net = dev_net(dev);
+	struct rtable *rt;
+	int err, ret = NET_XMIT_DROP;
+	struct flowi4 fl4 = {
+		.flowi4_oif = dev->ifindex,
+		.flowi4_tos = RT_TOS(ip4h->tos),
+		.flowi4_flags = FLOWI_FLAG_ANYSRC,
+		.flowi4_mark = skb->mark,
+		.daddr = ip4h->daddr,
+		.saddr = ip4h->saddr,
+	};
+
+	rt = ip_route_output_flow(net, &fl4, NULL);
+	if (IS_ERR(rt))
+		goto err;
+
+	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+		ip_rt_put(rt);
+		goto err;
+	}
+	skb_dst_set(skb, &rt->dst);
+
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+	err = ip_local_out(net, skb->sk, skb);
+	if (unlikely(net_xmit_eval(err)))
+		DEV_STATS_INC(dev, tx_errors);
+	else
+		ret = NET_XMIT_SUCCESS;
+	goto out;
+err:
+	DEV_STATS_INC(dev, tx_errors);
+	kfree_skb(skb);
+out:
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static noinline_for_stack int
+ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct flowi6 fl6 = {
+		.flowi6_oif = dev->ifindex,
+		.daddr = ip6h->daddr,
+		.saddr = ip6h->saddr,
+		.flowi6_flags = FLOWI_FLAG_ANYSRC,
+		.flowlabel = ip6_flowinfo(ip6h),
+		.flowi6_mark = skb->mark,
+		.flowi6_proto = ip6h->nexthdr,
+	};
+	struct dst_entry *dst;
+	int err;
+
+	dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+	err = dst->error;
+	if (err) {
+		dst_release(dst);
+		return err;
+	}
+	skb_dst_set(skb, dst);
+	return 0;
+}
+
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	int err, ret = NET_XMIT_DROP;
+
+	err = ipvlan_route_v6_outbound(dev, skb);
+	if (unlikely(err)) {
+		DEV_STATS_INC(dev, tx_errors);
+		kfree_skb(skb);
+		return err;
+	}
+
+	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+	err = ip6_local_out(dev_net(dev), skb->sk, skb);
+	if (unlikely(net_xmit_eval(err)))
+		DEV_STATS_INC(dev, tx_errors);
+	else
+		ret = NET_XMIT_SUCCESS;
+	return ret;
+}
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	return NET_XMIT_DROP;
+}
+#endif
+
+static int ipvlan_process_outbound(struct sk_buff *skb)
+{
+	int ret = NET_XMIT_DROP;
+
+	/* The ipvlan is a pseudo-L2 device, so the packets that we receive
+	 * will have L2; which need to discarded and processed further
+	 * in the net-ns of the main-device.
+	 */
+	if (skb_mac_header_was_set(skb)) {
+		/* In this mode we dont care about
+		 * multicast and broadcast traffic */
+		struct ethhdr *ethh = eth_hdr(skb);
+
+		if (is_multicast_ether_addr(ethh->h_dest)) {
+			pr_debug_ratelimited(
+				"Dropped {multi|broad}cast of type=[%x]\n",
+				ntohs(skb->protocol));
+			kfree_skb(skb);
+			goto out;
+		}
+
+		skb_pull(skb, sizeof(*ethh));
+		skb->mac_header = (typeof(skb->mac_header))~0U;
+		skb_reset_network_header(skb);
+	}
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ret = ipvlan_process_v6_outbound(skb);
+	else if (skb->protocol == htons(ETH_P_IP))
+		ret = ipvlan_process_v4_outbound(skb);
+	else {
+		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
+				    ntohs(skb->protocol));
+		kfree_skb(skb);
+	}
+out:
+	return ret;
+}
+
+static void ipvlan_multicast_enqueue(struct ipvl_port *port,
+				     struct sk_buff *skb, bool tx_pkt)
+{
+	if (skb->protocol == htons(ETH_P_PAUSE)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Record that the deferred packet is from TX or RX path. By
+	 * looking at mac-addresses on packet will lead to erronus decisions.
+	 * (This would be true for a loopback-mode on master device or a
+	 * hair-pin mode of the switch.)
+	 */
+	IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
+
+	spin_lock(&port->backlog.lock);
+	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
+		dev_hold(skb->dev);
+		__skb_queue_tail(&port->backlog, skb);
+		spin_unlock(&port->backlog.lock);
+		schedule_work(&port->wq);
+	} else {
+		spin_unlock(&port->backlog.lock);
+		dev_core_stats_rx_dropped_inc(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
+static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	void *lyr3h;
+	struct ipvl_addr *addr;
+	int addr_type;
+
+	lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	if (!ipvlan_is_vepa(ipvlan->port)) {
+		addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+		if (addr) {
+			if (ipvlan_is_private(ipvlan->port)) {
+				consume_skb(skb);
+				return NET_XMIT_DROP;
+			}
+			ipvlan_rcv_frame(addr, &skb, true);
+			return NET_XMIT_SUCCESS;
+		}
+	}
+out:
+	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
+	return ipvlan_process_outbound(skb);
+}
+
+static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ethhdr *eth = skb_eth_hdr(skb);
+	struct ipvl_addr *addr;
+	void *lyr3h;
+	int addr_type;
+
+	if (!ipvlan_is_vepa(ipvlan->port) &&
+	    ether_addr_equal(eth->h_dest, eth->h_source)) {
+		lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
+		if (lyr3h) {
+			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+			if (addr) {
+				if (ipvlan_is_private(ipvlan->port)) {
+					consume_skb(skb);
+					return NET_XMIT_DROP;
+				}
+				ipvlan_rcv_frame(addr, &skb, true);
+				return NET_XMIT_SUCCESS;
+			}
+		}
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (!skb)
+			return NET_XMIT_DROP;
+
+		/* Packet definitely does not belong to any of the
+		 * virtual devices, but the dest is local. So forward
+		 * the skb for the main-dev. At the RX side we just return
+		 * RX_PASS for it to be processed further on the stack.
+		 */
+		dev_forward_skb(ipvlan->phy_dev, skb);
+		return NET_XMIT_SUCCESS;
+
+	} else if (is_multicast_ether_addr(eth->h_dest)) {
+		skb_reset_mac_header(skb);
+		ipvlan_skb_crossing_ns(skb, NULL);
+		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
+		return NET_XMIT_SUCCESS;
+	}
+
+	skb->dev = ipvlan->phy_dev;
+	return dev_queue_xmit(skb);
+}
+
+int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
+
+	if (!port)
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
+		goto out;
+
+	switch(port->mode) {
+	case IPVLAN_MODE_L2:
+		return ipvlan_xmit_mode_l2(skb, dev);
+	case IPVLAN_MODE_L3:
+#ifdef CONFIG_IPVLAN_L3S
+	case IPVLAN_MODE_L3S:
+#endif
+		return ipvlan_xmit_mode_l3(skb, dev);
+	}
+
+	/* Should not reach here */
+	WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode);
+out:
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
+{
+	struct ethhdr *eth = eth_hdr(skb);
+	struct ipvl_addr *addr;
+	void *lyr3h;
+	int addr_type;
+
+	if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
+		lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+		if (!lyr3h)
+			return true;
+
+		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
+		if (addr)
+			return false;
+	}
+
+	return true;
+}
+
+static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
+						 struct ipvl_port *port)
+{
+	void *lyr3h;
+	int addr_type;
+	struct ipvl_addr *addr;
+	struct sk_buff *skb = *pskb;
+	rx_handler_result_t ret = RX_HANDLER_PASS;
+
+	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+	if (addr)
+		ret = ipvlan_rcv_frame(addr, pskb, false);
+
+out:
+	return ret;
+}
+
+static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
+						 struct ipvl_port *port)
+{
+	struct sk_buff *skb = *pskb;
+	struct ethhdr *eth = eth_hdr(skb);
+	rx_handler_result_t ret = RX_HANDLER_PASS;
+
+	if (is_multicast_ether_addr(eth->h_dest)) {
+		if (ipvlan_external_frame(skb, port)) {
+			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+			/* External frames are queued for device local
+			 * distribution, but a copy is given to master
+			 * straight away to avoid sending duplicates later
+			 * when work-queue processes this frame. This is
+			 * achieved by returning RX_HANDLER_PASS.
+			 */
+			if (nskb) {
+				ipvlan_skb_crossing_ns(nskb, NULL);
+				ipvlan_multicast_enqueue(port, nskb, false);
+			}
+		}
+	} else {
+		/* Perform like l3 mode for non-multicast packet */
+		ret = ipvlan_handle_mode_l3(pskb, port);
+	}
+
+	return ret;
+}
+
+rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
+
+	if (!port)
+		return RX_HANDLER_PASS;
+
+	switch (port->mode) {
+	case IPVLAN_MODE_L2:
+		return ipvlan_handle_mode_l2(pskb, port);
+	case IPVLAN_MODE_L3:
+		return ipvlan_handle_mode_l3(pskb, port);
+#ifdef CONFIG_IPVLAN_L3S
+	case IPVLAN_MODE_L3S:
+		return RX_HANDLER_PASS;
+#endif
+	}
+
+	/* Should not reach here */
+	WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode);
+	kfree_skb(skb);
+	return RX_HANDLER_CONSUMED;
+}
diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
new file mode 100644
index 0000000000..d5b05e8032
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_l3s.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ */
+
+#include "ipvlan.h"
+
+static unsigned int ipvlan_netid __read_mostly;
+
+struct ipvlan_netns {
+	unsigned int ipvl_nf_hook_refcnt;
+};
+
+static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
+					    struct net_device *dev)
+{
+	struct ipvl_addr *addr = NULL;
+	struct ipvl_port *port;
+	int addr_type;
+	void *lyr3h;
+
+	if (!dev || !netif_is_ipvlan_port(dev))
+		goto out;
+
+	port = ipvlan_port_get_rcu(dev);
+	if (!port || port->mode != IPVLAN_MODE_L3S)
+		goto out;
+
+	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+out:
+	return addr;
+}
+
+static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev,
+				     struct sk_buff *skb, u16 proto)
+{
+	struct ipvl_addr *addr;
+	struct net_device *sdev;
+
+	addr = ipvlan_skb_to_addr(skb, dev);
+	if (!addr)
+		goto out;
+
+	sdev = addr->master->dev;
+	switch (proto) {
+	case AF_INET:
+	{
+		struct iphdr *ip4h = ip_hdr(skb);
+		int err;
+
+		err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
+					   ip4h->tos, sdev);
+		if (unlikely(err))
+			goto out;
+		break;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+	{
+		struct dst_entry *dst;
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		int flags = RT6_LOOKUP_F_HAS_SADDR;
+		struct flowi6 fl6 = {
+			.flowi6_iif   = sdev->ifindex,
+			.daddr        = ip6h->daddr,
+			.saddr        = ip6h->saddr,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_mark  = skb->mark,
+			.flowi6_proto = ip6h->nexthdr,
+		};
+
+		skb_dst_drop(skb);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+					     skb, flags);
+		skb_dst_set(skb, dst);
+		break;
+	}
+#endif
+	default:
+		break;
+	}
+out:
+	return skb;
+}
+
+static const struct l3mdev_ops ipvl_l3mdev_ops = {
+	.l3mdev_l3_rcv = ipvlan_l3_rcv,
+};
+
+static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+				    const struct nf_hook_state *state)
+{
+	struct ipvl_addr *addr;
+	unsigned int len;
+
+	addr = ipvlan_skb_to_addr(skb, skb->dev);
+	if (!addr)
+		goto out;
+
+	skb->dev = addr->master->dev;
+	skb->skb_iif = skb->dev->ifindex;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (addr->atype == IPVL_IPV6)
+		IP6CB(skb)->iif = skb->dev->ifindex;
+#endif
+	len = skb->len + ETH_HLEN;
+	ipvlan_count_rx(addr->master, len, true, false);
+out:
+	return NF_ACCEPT;
+}
+
+static const struct nf_hook_ops ipvl_nfops[] = {
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV4,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+#if IS_ENABLED(CONFIG_IPV6)
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV6,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+#endif
+};
+
+static int ipvlan_register_nf_hook(struct net *net)
+{
+	struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+	int err = 0;
+
+	if (!vnet->ipvl_nf_hook_refcnt) {
+		err = nf_register_net_hooks(net, ipvl_nfops,
+					    ARRAY_SIZE(ipvl_nfops));
+		if (!err)
+			vnet->ipvl_nf_hook_refcnt = 1;
+	} else {
+		vnet->ipvl_nf_hook_refcnt++;
+	}
+
+	return err;
+}
+
+static void ipvlan_unregister_nf_hook(struct net *net)
+{
+	struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+
+	if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
+		return;
+
+	vnet->ipvl_nf_hook_refcnt--;
+	if (!vnet->ipvl_nf_hook_refcnt)
+		nf_unregister_net_hooks(net, ipvl_nfops,
+					ARRAY_SIZE(ipvl_nfops));
+}
+
+void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet)
+{
+	struct ipvlan_netns *old_vnet;
+
+	ASSERT_RTNL();
+
+	old_vnet = net_generic(oldnet, ipvlan_netid);
+	if (!old_vnet->ipvl_nf_hook_refcnt)
+		return;
+
+	ipvlan_register_nf_hook(newnet);
+	ipvlan_unregister_nf_hook(oldnet);
+}
+
+static void ipvlan_ns_exit(struct net *net)
+{
+	struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+
+	if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
+		vnet->ipvl_nf_hook_refcnt = 0;
+		nf_unregister_net_hooks(net, ipvl_nfops,
+					ARRAY_SIZE(ipvl_nfops));
+	}
+}
+
+static struct pernet_operations ipvlan_net_ops = {
+	.id   = &ipvlan_netid,
+	.size = sizeof(struct ipvlan_netns),
+	.exit = ipvlan_ns_exit,
+};
+
+int ipvlan_l3s_init(void)
+{
+	return register_pernet_subsys(&ipvlan_net_ops);
+}
+
+void ipvlan_l3s_cleanup(void)
+{
+	unregister_pernet_subsys(&ipvlan_net_ops);
+}
+
+int ipvlan_l3s_register(struct ipvl_port *port)
+{
+	struct net_device *dev = port->dev;
+	int ret;
+
+	ASSERT_RTNL();
+
+	ret = ipvlan_register_nf_hook(read_pnet(&port->pnet));
+	if (!ret) {
+		dev->l3mdev_ops = &ipvl_l3mdev_ops;
+		dev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
+	}
+
+	return ret;
+}
+
+void ipvlan_l3s_unregister(struct ipvl_port *port)
+{
+	struct net_device *dev = port->dev;
+
+	ASSERT_RTNL();
+
+	dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+	ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
+	dev->l3mdev_ops = NULL;
+}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
new file mode 100644
index 0000000000..57c79f5f29
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -0,0 +1,1084 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ */
+
+#include <linux/ethtool.h>
+
+#include "ipvlan.h"
+
+static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
+				struct netlink_ext_ack *extack)
+{
+	struct ipvl_dev *ipvlan;
+	unsigned int flags;
+	int err;
+
+	ASSERT_RTNL();
+	if (port->mode != nval) {
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			flags = ipvlan->dev->flags;
+			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
+				err = dev_change_flags(ipvlan->dev,
+						       flags | IFF_NOARP,
+						       extack);
+			} else {
+				err = dev_change_flags(ipvlan->dev,
+						       flags & ~IFF_NOARP,
+						       extack);
+			}
+			if (unlikely(err))
+				goto fail;
+		}
+		if (nval == IPVLAN_MODE_L3S) {
+			/* New mode is L3S */
+			err = ipvlan_l3s_register(port);
+			if (err)
+				goto fail;
+		} else if (port->mode == IPVLAN_MODE_L3S) {
+			/* Old mode was L3S */
+			ipvlan_l3s_unregister(port);
+		}
+		port->mode = nval;
+	}
+	return 0;
+
+fail:
+	/* Undo the flags changes that have been done so far. */
+	list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
+		flags = ipvlan->dev->flags;
+		if (port->mode == IPVLAN_MODE_L3 ||
+		    port->mode == IPVLAN_MODE_L3S)
+			dev_change_flags(ipvlan->dev, flags | IFF_NOARP,
+					 NULL);
+		else
+			dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP,
+					 NULL);
+	}
+
+	return err;
+}
+
+static int ipvlan_port_create(struct net_device *dev)
+{
+	struct ipvl_port *port;
+	int err, idx;
+
+	port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	write_pnet(&port->pnet, dev_net(dev));
+	port->dev = dev;
+	port->mode = IPVLAN_MODE_L3;
+	INIT_LIST_HEAD(&port->ipvlans);
+	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
+		INIT_HLIST_HEAD(&port->hlhead[idx]);
+
+	skb_queue_head_init(&port->backlog);
+	INIT_WORK(&port->wq, ipvlan_process_multicast);
+	ida_init(&port->ida);
+	port->dev_id_start = 1;
+
+	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
+	if (err)
+		goto err;
+
+	netdev_hold(dev, &port->dev_tracker, GFP_KERNEL);
+	return 0;
+
+err:
+	kfree(port);
+	return err;
+}
+
+static void ipvlan_port_destroy(struct net_device *dev)
+{
+	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
+	struct sk_buff *skb;
+
+	netdev_put(dev, &port->dev_tracker);
+	if (port->mode == IPVLAN_MODE_L3S)
+		ipvlan_l3s_unregister(port);
+	netdev_rx_handler_unregister(dev);
+	cancel_work_sync(&port->wq);
+	while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+	ida_destroy(&port->ida);
+	kfree(port);
+}
+
+#define IPVLAN_ALWAYS_ON_OFLOADS \
+	(NETIF_F_SG | NETIF_F_HW_CSUM | \
+	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL)
+
+#define IPVLAN_ALWAYS_ON \
+	(IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED)
+
+#define IPVLAN_FEATURES \
+	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
+	 NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \
+	 NETIF_F_GRO | NETIF_F_RXCSUM | \
+	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
+
+	/* NETIF_F_GSO_ENCAP_ALL NETIF_F_GSO_SOFTWARE Newly added */
+
+#define IPVLAN_STATE_MASK \
+	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
+
+static int ipvlan_init(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+	struct ipvl_port *port;
+	int err;
+
+	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
+		     (phy_dev->state & IPVLAN_STATE_MASK);
+	dev->features = phy_dev->features & IPVLAN_FEATURES;
+	dev->features |= IPVLAN_ALWAYS_ON;
+	dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES;
+	dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS;
+	dev->hw_enc_features |= dev->features;
+	netif_inherit_tso_max(dev, phy_dev);
+	dev->hard_header_len = phy_dev->hard_header_len;
+
+	netdev_lockdep_set_classes(dev);
+
+	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
+	if (!ipvlan->pcpu_stats)
+		return -ENOMEM;
+
+	if (!netif_is_ipvlan_port(phy_dev)) {
+		err = ipvlan_port_create(phy_dev);
+		if (err < 0) {
+			free_percpu(ipvlan->pcpu_stats);
+			return err;
+		}
+	}
+	port = ipvlan_port_get_rtnl(phy_dev);
+	port->count += 1;
+	return 0;
+}
+
+static void ipvlan_uninit(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+	struct ipvl_port *port;
+
+	free_percpu(ipvlan->pcpu_stats);
+
+	port = ipvlan_port_get_rtnl(phy_dev);
+	port->count -= 1;
+	if (!port->count)
+		ipvlan_port_destroy(port->dev);
+}
+
+static int ipvlan_open(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_addr *addr;
+
+	if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
+	    ipvlan->port->mode == IPVLAN_MODE_L3S)
+		dev->flags |= IFF_NOARP;
+	else
+		dev->flags &= ~IFF_NOARP;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
+		ipvlan_ht_addr_add(ipvlan, addr);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int ipvlan_stop(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+	struct ipvl_addr *addr;
+
+	dev_uc_unsync(phy_dev, dev);
+	dev_mc_unsync(phy_dev, dev);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
+		ipvlan_ht_addr_del(addr);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	int skblen = skb->len;
+	int ret;
+
+	ret = ipvlan_queue_xmit(skb, dev);
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
+		struct ipvl_pcpu_stats *pcptr;
+
+		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
+
+		u64_stats_update_begin(&pcptr->syncp);
+		u64_stats_inc(&pcptr->tx_pkts);
+		u64_stats_add(&pcptr->tx_bytes, skblen);
+		u64_stats_update_end(&pcptr->syncp);
+	} else {
+		this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
+	}
+	return ret;
+}
+
+static netdev_features_t ipvlan_fix_features(struct net_device *dev,
+					     netdev_features_t features)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	features |= NETIF_F_ALL_FOR_ALL;
+	features &= (ipvlan->sfeatures | ~IPVLAN_FEATURES);
+	features = netdev_increment_features(ipvlan->phy_dev->features,
+					     features, features);
+	features |= IPVLAN_ALWAYS_ON;
+	features &= (IPVLAN_FEATURES | IPVLAN_ALWAYS_ON);
+
+	return features;
+}
+
+static void ipvlan_change_rx_flags(struct net_device *dev, int change)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	if (change & IFF_ALLMULTI)
+		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
+}
+
+static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
+	} else {
+		struct netdev_hw_addr *ha;
+		DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
+
+		bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
+		netdev_for_each_mc_addr(ha, dev)
+			__set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
+
+		/* Turn-on broadcast bit irrespective of address family,
+		 * since broadcast is deferred to a work-queue, hence no
+		 * impact on fast-path processing.
+		 */
+		__set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
+
+		bitmap_copy(ipvlan->mac_filters, mc_filters,
+			    IPVLAN_MAC_FILTER_SIZE);
+	}
+	dev_uc_sync(ipvlan->phy_dev, dev);
+	dev_mc_sync(ipvlan->phy_dev, dev);
+}
+
+static void ipvlan_get_stats64(struct net_device *dev,
+			       struct rtnl_link_stats64 *s)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (ipvlan->pcpu_stats) {
+		struct ipvl_pcpu_stats *pcptr;
+		u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
+		u32 rx_errs = 0, tx_drps = 0;
+		u32 strt;
+		int idx;
+
+		for_each_possible_cpu(idx) {
+			pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
+			do {
+				strt = u64_stats_fetch_begin(&pcptr->syncp);
+				rx_pkts = u64_stats_read(&pcptr->rx_pkts);
+				rx_bytes = u64_stats_read(&pcptr->rx_bytes);
+				rx_mcast = u64_stats_read(&pcptr->rx_mcast);
+				tx_pkts = u64_stats_read(&pcptr->tx_pkts);
+				tx_bytes = u64_stats_read(&pcptr->tx_bytes);
+			} while (u64_stats_fetch_retry(&pcptr->syncp,
+							   strt));
+
+			s->rx_packets += rx_pkts;
+			s->rx_bytes += rx_bytes;
+			s->multicast += rx_mcast;
+			s->tx_packets += tx_pkts;
+			s->tx_bytes += tx_bytes;
+
+			/* u32 values are updated without syncp protection. */
+			rx_errs += READ_ONCE(pcptr->rx_errs);
+			tx_drps += READ_ONCE(pcptr->tx_drps);
+		}
+		s->rx_errors = rx_errs;
+		s->rx_dropped = rx_errs;
+		s->tx_dropped = tx_drps;
+	}
+	s->tx_errors = DEV_STATS_READ(dev, tx_errors);
+}
+
+static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	return vlan_vid_add(phy_dev, proto, vid);
+}
+
+static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+				   u16 vid)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	vlan_vid_del(phy_dev, proto, vid);
+	return 0;
+}
+
+static int ipvlan_get_iflink(const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return ipvlan->phy_dev->ifindex;
+}
+
+static const struct net_device_ops ipvlan_netdev_ops = {
+	.ndo_init		= ipvlan_init,
+	.ndo_uninit		= ipvlan_uninit,
+	.ndo_open		= ipvlan_open,
+	.ndo_stop		= ipvlan_stop,
+	.ndo_start_xmit		= ipvlan_start_xmit,
+	.ndo_fix_features	= ipvlan_fix_features,
+	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
+	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
+	.ndo_get_stats64	= ipvlan_get_stats64,
+	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= ipvlan_vlan_rx_kill_vid,
+	.ndo_get_iflink		= ipvlan_get_iflink,
+};
+
+static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
+			      unsigned short type, const void *daddr,
+			      const void *saddr, unsigned len)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	/* TODO Probably use a different field than dev_addr so that the
+	 * mac-address on the virtual device is portable and can be carried
+	 * while the packets use the mac-addr on the physical device.
+	 */
+	return dev_hard_header(skb, phy_dev, type, daddr,
+			       saddr ? : phy_dev->dev_addr, len);
+}
+
+static const struct header_ops ipvlan_header_ops = {
+	.create  	= ipvlan_hard_header,
+	.parse		= eth_header_parse,
+	.cache		= eth_header_cache,
+	.cache_update	= eth_header_cache_update,
+};
+
+static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
+{
+	ipvlan->dev->mtu = dev->mtu;
+}
+
+static bool netif_is_ipvlan(const struct net_device *dev)
+{
+	/* both ipvlan and ipvtap devices use the same netdev_ops */
+	return dev->netdev_ops == &ipvlan_netdev_ops;
+}
+
+static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
+					     struct ethtool_link_ksettings *cmd)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
+}
+
+static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
+				       struct ethtool_drvinfo *drvinfo)
+{
+	strscpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
+	strscpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
+}
+
+static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return ipvlan->msg_enable;
+}
+
+static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	ipvlan->msg_enable = value;
+}
+
+static const struct ethtool_ops ipvlan_ethtool_ops = {
+	.get_link	= ethtool_op_get_link,
+	.get_link_ksettings	= ipvlan_ethtool_get_link_ksettings,
+	.get_drvinfo	= ipvlan_ethtool_get_drvinfo,
+	.get_msglevel	= ipvlan_ethtool_get_msglevel,
+	.set_msglevel	= ipvlan_ethtool_set_msglevel,
+};
+
+static int ipvlan_nl_changelink(struct net_device *dev,
+				struct nlattr *tb[], struct nlattr *data[],
+				struct netlink_ext_ack *extack)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+	int err = 0;
+
+	if (!data)
+		return 0;
+	if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (data[IFLA_IPVLAN_MODE]) {
+		u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+		err = ipvlan_set_port_mode(port, nmode, extack);
+	}
+
+	if (!err && data[IFLA_IPVLAN_FLAGS]) {
+		u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
+
+		if (flags & IPVLAN_F_PRIVATE)
+			ipvlan_mark_private(port);
+		else
+			ipvlan_clear_private(port);
+
+		if (flags & IPVLAN_F_VEPA)
+			ipvlan_mark_vepa(port);
+		else
+			ipvlan_clear_vepa(port);
+	}
+
+	return err;
+}
+
+static size_t ipvlan_nl_getsize(const struct net_device *dev)
+{
+	return (0
+		+ nla_total_size(2) /* IFLA_IPVLAN_MODE */
+		+ nla_total_size(2) /* IFLA_IPVLAN_FLAGS */
+		);
+}
+
+static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[],
+			      struct netlink_ext_ack *extack)
+{
+	if (!data)
+		return 0;
+
+	if (data[IFLA_IPVLAN_MODE]) {
+		u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+		if (mode >= IPVLAN_MODE_MAX)
+			return -EINVAL;
+	}
+	if (data[IFLA_IPVLAN_FLAGS]) {
+		u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
+
+		/* Only two bits are used at this moment. */
+		if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
+			return -EINVAL;
+		/* Also both flags can't be active at the same time. */
+		if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) ==
+		    (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ipvlan_nl_fillinfo(struct sk_buff *skb,
+			      const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+	int ret = -EINVAL;
+
+	if (!port)
+		goto err;
+
+	ret = -EMSGSIZE;
+	if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
+		goto err;
+	if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags))
+		goto err;
+
+	return 0;
+
+err:
+	return ret;
+}
+
+int ipvlan_link_new(struct net *src_net, struct net_device *dev,
+		    struct nlattr *tb[], struct nlattr *data[],
+		    struct netlink_ext_ack *extack)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port;
+	struct net_device *phy_dev;
+	int err;
+	u16 mode = IPVLAN_MODE_L3;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!phy_dev)
+		return -ENODEV;
+
+	if (netif_is_ipvlan(phy_dev)) {
+		struct ipvl_dev *tmp = netdev_priv(phy_dev);
+
+		phy_dev = tmp->phy_dev;
+		if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+	} else if (!netif_is_ipvlan_port(phy_dev)) {
+		/* Exit early if the underlying link is invalid or busy */
+		if (phy_dev->type != ARPHRD_ETHER ||
+		    phy_dev->flags & IFF_LOOPBACK) {
+			netdev_err(phy_dev,
+				   "Master is either lo or non-ether device\n");
+			return -EINVAL;
+		}
+
+		if (netdev_is_rx_handler_busy(phy_dev)) {
+			netdev_err(phy_dev, "Device is already in use.\n");
+			return -EBUSY;
+		}
+	}
+
+	ipvlan->phy_dev = phy_dev;
+	ipvlan->dev = dev;
+	ipvlan->sfeatures = IPVLAN_FEATURES;
+	if (!tb[IFLA_MTU])
+		ipvlan_adjust_mtu(ipvlan, phy_dev);
+	INIT_LIST_HEAD(&ipvlan->addrs);
+	spin_lock_init(&ipvlan->addrs_lock);
+
+	/* TODO Probably put random address here to be presented to the
+	 * world but keep using the physical-dev address for the outgoing
+	 * packets.
+	 */
+	eth_hw_addr_set(dev, phy_dev->dev_addr);
+
+	dev->priv_flags |= IFF_NO_RX_HANDLER;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		return err;
+
+	/* ipvlan_init() would have created the port, if required */
+	port = ipvlan_port_get_rtnl(phy_dev);
+	ipvlan->port = port;
+
+	/* If the port-id base is at the MAX value, then wrap it around and
+	 * begin from 0x1 again. This may be due to a busy system where lots
+	 * of slaves are getting created and deleted.
+	 */
+	if (port->dev_id_start == 0xFFFE)
+		port->dev_id_start = 0x1;
+
+	/* Since L2 address is shared among all IPvlan slaves including
+	 * master, use unique 16 bit dev-ids to diffentiate among them.
+	 * Assign IDs between 0x1 and 0xFFFE (used by the master) to each
+	 * slave link [see addrconf_ifid_eui48()].
+	 */
+	err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE,
+			     GFP_KERNEL);
+	if (err < 0)
+		err = ida_simple_get(&port->ida, 0x1, port->dev_id_start,
+				     GFP_KERNEL);
+	if (err < 0)
+		goto unregister_netdev;
+	dev->dev_id = err;
+
+	/* Increment id-base to the next slot for the future assignment */
+	port->dev_id_start = err + 1;
+
+	err = netdev_upper_dev_link(phy_dev, dev, extack);
+	if (err)
+		goto remove_ida;
+
+	/* Flags are per port and latest update overrides. User has
+	 * to be consistent in setting it just like the mode attribute.
+	 */
+	if (data && data[IFLA_IPVLAN_FLAGS])
+		port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
+
+	if (data && data[IFLA_IPVLAN_MODE])
+		mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+	err = ipvlan_set_port_mode(port, mode, extack);
+	if (err)
+		goto unlink_netdev;
+
+	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
+	netif_stacked_transfer_operstate(phy_dev, dev);
+	return 0;
+
+unlink_netdev:
+	netdev_upper_dev_unlink(phy_dev, dev);
+remove_ida:
+	ida_simple_remove(&port->ida, dev->dev_id);
+unregister_netdev:
+	unregister_netdevice(dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(ipvlan_link_new);
+
+void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_addr *addr, *next;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
+		ipvlan_ht_addr_del(addr);
+		list_del_rcu(&addr->anode);
+		kfree_rcu(addr, rcu);
+	}
+	spin_unlock_bh(&ipvlan->addrs_lock);
+
+	ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
+	list_del_rcu(&ipvlan->pnode);
+	unregister_netdevice_queue(dev, head);
+	netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
+}
+EXPORT_SYMBOL_GPL(ipvlan_link_delete);
+
+void ipvlan_link_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->max_mtu = ETH_MAX_MTU;
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
+	dev->netdev_ops = &ipvlan_netdev_ops;
+	dev->needs_free_netdev = true;
+	dev->header_ops = &ipvlan_header_ops;
+	dev->ethtool_ops = &ipvlan_ethtool_ops;
+}
+EXPORT_SYMBOL_GPL(ipvlan_link_setup);
+
+static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
+{
+	[IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
+	[IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 },
+};
+
+static struct net *ipvlan_get_link_net(const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return dev_net(ipvlan->phy_dev);
+}
+
+static struct rtnl_link_ops ipvlan_link_ops = {
+	.kind		= "ipvlan",
+	.priv_size	= sizeof(struct ipvl_dev),
+
+	.setup		= ipvlan_link_setup,
+	.newlink	= ipvlan_link_new,
+	.dellink	= ipvlan_link_delete,
+	.get_link_net   = ipvlan_get_link_net,
+};
+
+int ipvlan_link_register(struct rtnl_link_ops *ops)
+{
+	ops->get_size	= ipvlan_nl_getsize;
+	ops->policy	= ipvlan_nl_policy;
+	ops->validate	= ipvlan_nl_validate;
+	ops->fill_info	= ipvlan_nl_fillinfo;
+	ops->changelink = ipvlan_nl_changelink;
+	ops->maxtype	= IFLA_IPVLAN_MAX;
+	return rtnl_link_register(ops);
+}
+EXPORT_SYMBOL_GPL(ipvlan_link_register);
+
+static int ipvlan_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+	struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ipvl_dev *ipvlan, *next;
+	struct ipvl_port *port;
+	LIST_HEAD(lst_kill);
+	int err;
+
+	if (!netif_is_ipvlan_port(dev))
+		return NOTIFY_DONE;
+
+	port = ipvlan_port_get_rtnl(dev);
+
+	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_CHANGE:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+			netif_stacked_transfer_operstate(ipvlan->phy_dev,
+							 ipvlan->dev);
+		break;
+
+	case NETDEV_REGISTER: {
+		struct net *oldnet, *newnet = dev_net(dev);
+
+		oldnet = read_pnet(&port->pnet);
+		if (net_eq(newnet, oldnet))
+			break;
+
+		write_pnet(&port->pnet, newnet);
+
+		if (port->mode == IPVLAN_MODE_L3S)
+			ipvlan_migrate_l3s_hook(oldnet, newnet);
+		break;
+	}
+	case NETDEV_UNREGISTER:
+		if (dev->reg_state != NETREG_UNREGISTERING)
+			break;
+
+		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
+			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
+							    &lst_kill);
+		unregister_netdevice_many(&lst_kill);
+		break;
+
+	case NETDEV_FEAT_CHANGE:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			netif_inherit_tso_max(ipvlan->dev, dev);
+			netdev_update_features(ipvlan->dev);
+		}
+		break;
+
+	case NETDEV_CHANGEMTU:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+			ipvlan_adjust_mtu(ipvlan, dev);
+		break;
+
+	case NETDEV_PRE_CHANGEADDR:
+		prechaddr_info = ptr;
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			err = dev_pre_changeaddr_notify(ipvlan->dev,
+						    prechaddr_info->dev_addr,
+						    extack);
+			if (err)
+				return notifier_from_errno(err);
+		}
+		break;
+
+	case NETDEV_CHANGEADDR:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			eth_hw_addr_set(ipvlan->dev, dev->dev_addr);
+			call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
+		}
+		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlying device to change its type. */
+		return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+/* the caller must held the addrs lock */
+static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr;
+
+	addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
+	if (!addr)
+		return -ENOMEM;
+
+	addr->master = ipvlan;
+	if (!is_v6) {
+		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
+		addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+		addr->atype = IPVL_IPV6;
+#endif
+	}
+
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
+
+	/* If the interface is not up, the address will be added to the hash
+	 * list by ipvlan_open.
+	 */
+	if (netif_running(ipvlan->dev))
+		ipvlan_ht_addr_add(ipvlan, addr);
+
+	return 0;
+}
+
+static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
+	if (!addr) {
+		spin_unlock_bh(&ipvlan->addrs_lock);
+		return;
+	}
+
+	ipvlan_ht_addr_del(addr);
+	list_del_rcu(&addr->anode);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	kfree_rcu(addr, rcu);
+}
+
+static bool ipvlan_is_valid_dev(const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!netif_is_ipvlan(dev))
+		return false;
+
+	if (!ipvlan || !ipvlan->port)
+		return false;
+
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv6=%pI6c addr for %s intf\n",
+			  ip6_addr, ipvlan->dev->name);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	return ipvlan_del_addr(ipvlan, ip6_addr, true);
+}
+
+static int ipvlan_addr6_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
+	struct net_device *dev = (struct net_device *)if6->idev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!ipvlan_is_valid_dev(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_add_addr6(ipvlan, &if6->addr))
+			return NOTIFY_BAD;
+		break;
+
+	case NETDEV_DOWN:
+		ipvlan_del_addr6(ipvlan, &if6->addr);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int ipvlan_addr6_validator_event(struct notifier_block *unused,
+					unsigned long event, void *ptr)
+{
+	struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
+	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!ipvlan_is_valid_dev(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
+			NL_SET_ERR_MSG(i6vi->extack,
+				       "Address already assigned to an ipvlan device");
+			return notifier_from_errno(-EADDRINUSE);
+		}
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+#endif
+
+static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
+{
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv4=%pI4 on %s intf.\n",
+			  ip4_addr, ipvlan->dev->name);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
+}
+
+static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
+{
+	return ipvlan_del_addr(ipvlan, ip4_addr, false);
+}
+
+static int ipvlan_addr4_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
+	struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct in_addr ip4_addr;
+
+	if (!ipvlan_is_valid_dev(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		ip4_addr.s_addr = if4->ifa_address;
+		if (ipvlan_add_addr4(ipvlan, &ip4_addr))
+			return NOTIFY_BAD;
+		break;
+
+	case NETDEV_DOWN:
+		ip4_addr.s_addr = if4->ifa_address;
+		ipvlan_del_addr4(ipvlan, &ip4_addr);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int ipvlan_addr4_validator_event(struct notifier_block *unused,
+					unsigned long event, void *ptr)
+{
+	struct in_validator_info *ivi = (struct in_validator_info *)ptr;
+	struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!ipvlan_is_valid_dev(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
+			NL_SET_ERR_MSG(ivi->extack,
+				       "Address already assigned to an ipvlan device");
+			return notifier_from_errno(-EADDRINUSE);
+		}
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr4_event,
+};
+
+static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr4_validator_event,
+};
+
+static struct notifier_block ipvlan_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_device_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr6_event,
+};
+
+static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr6_validator_event,
+};
+#endif
+
+static int __init ipvlan_init_module(void)
+{
+	int err;
+
+	ipvlan_init_secret();
+	register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
+	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	register_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
+#endif
+	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
+
+	err = ipvlan_l3s_init();
+	if (err < 0)
+		goto error;
+
+	err = ipvlan_link_register(&ipvlan_link_ops);
+	if (err < 0) {
+		ipvlan_l3s_cleanup();
+		goto error;
+	}
+
+	return 0;
+error:
+	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inetaddr_validator_notifier(
+	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	unregister_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
+#endif
+	unregister_netdevice_notifier(&ipvlan_notifier_block);
+	return err;
+}
+
+static void __exit ipvlan_cleanup_module(void)
+{
+	rtnl_link_unregister(&ipvlan_link_ops);
+	ipvlan_l3s_cleanup();
+	unregister_netdevice_notifier(&ipvlan_notifier_block);
+	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inetaddr_validator_notifier(
+	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	unregister_inet6addr_validator_notifier(
+	    &ipvlan_addr6_vtor_notifier_block);
+#endif
+}
+
+module_init(ipvlan_init_module);
+module_exit(ipvlan_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
+MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
+MODULE_ALIAS_RTNL_LINK("ipvlan");
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
new file mode 100644
index 0000000000..60944a4bea
--- /dev/null
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/etherdevice.h>
+#include "ipvlan.h"
+#include <linux/if_vlan.h>
+#include <linux/if_tap.h>
+#include <linux/interrupt.h>
+#include <linux/nsproxy.h>
+#include <linux/compat.h>
+#include <linux/if_tun.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/fs.h>
+#include <linux/uio.h>
+
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <net/sock.h>
+#include <linux/virtio_net.h>
+
+#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
+		      NETIF_F_TSO6)
+
+static dev_t ipvtap_major;
+static struct cdev ipvtap_cdev;
+
+static const void *ipvtap_net_namespace(const struct device *d)
+{
+	const struct net_device *dev = to_net_dev(d->parent);
+	return dev_net(dev);
+}
+
+static struct class ipvtap_class = {
+	 .name = "ipvtap",
+	 .ns_type = &net_ns_type_operations,
+	 .namespace = ipvtap_net_namespace,
+};
+
+struct ipvtap_dev {
+	struct ipvl_dev vlan;
+	struct tap_dev	  tap;
+};
+
+static void ipvtap_count_tx_dropped(struct tap_dev *tap)
+{
+	struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
+	struct ipvl_dev *vlan = &vlantap->vlan;
+
+	this_cpu_inc(vlan->pcpu_stats->tx_drps);
+}
+
+static void ipvtap_count_rx_dropped(struct tap_dev *tap)
+{
+	struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
+	struct ipvl_dev *vlan = &vlantap->vlan;
+
+	ipvlan_count_rx(vlan, 0, 0, 0);
+}
+
+static void ipvtap_update_features(struct tap_dev *tap,
+				   netdev_features_t features)
+{
+	struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
+	struct ipvl_dev *vlan = &vlantap->vlan;
+
+	vlan->sfeatures = features;
+	netdev_update_features(vlan->dev);
+}
+
+static int ipvtap_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[],
+			  struct netlink_ext_ack *extack)
+{
+	struct ipvtap_dev *vlantap = netdev_priv(dev);
+	int err;
+
+	INIT_LIST_HEAD(&vlantap->tap.queue_list);
+
+	/* Since macvlan supports all offloads by default, make
+	 * tap support all offloads also.
+	 */
+	vlantap->tap.tap_features = TUN_OFFLOADS;
+	vlantap->tap.count_tx_dropped = ipvtap_count_tx_dropped;
+	vlantap->tap.update_features =	ipvtap_update_features;
+	vlantap->tap.count_rx_dropped = ipvtap_count_rx_dropped;
+
+	err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap);
+	if (err)
+		return err;
+
+	/* Don't put anything that may fail after macvlan_common_newlink
+	 * because we can't undo what it does.
+	 */
+	err =  ipvlan_link_new(src_net, dev, tb, data, extack);
+	if (err) {
+		netdev_rx_handler_unregister(dev);
+		return err;
+	}
+
+	vlantap->tap.dev = vlantap->vlan.dev;
+
+	return err;
+}
+
+static void ipvtap_dellink(struct net_device *dev,
+			   struct list_head *head)
+{
+	struct ipvtap_dev *vlan = netdev_priv(dev);
+
+	netdev_rx_handler_unregister(dev);
+	tap_del_queues(&vlan->tap);
+	ipvlan_link_delete(dev, head);
+}
+
+static void ipvtap_setup(struct net_device *dev)
+{
+	ipvlan_link_setup(dev);
+	dev->tx_queue_len = TUN_READQ_SIZE;
+	dev->priv_flags &= ~IFF_NO_QUEUE;
+}
+
+static struct rtnl_link_ops ipvtap_link_ops __read_mostly = {
+	.kind		= "ipvtap",
+	.setup		= ipvtap_setup,
+	.newlink	= ipvtap_newlink,
+	.dellink	= ipvtap_dellink,
+	.priv_size	= sizeof(struct ipvtap_dev),
+};
+
+static int ipvtap_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ipvtap_dev *vlantap;
+	struct device *classdev;
+	dev_t devt;
+	int err;
+	char tap_name[IFNAMSIZ];
+
+	if (dev->rtnl_link_ops != &ipvtap_link_ops)
+		return NOTIFY_DONE;
+
+	snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex);
+	vlantap = netdev_priv(dev);
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		/* Create the device node here after the network device has
+		 * been registered but before register_netdevice has
+		 * finished running.
+		 */
+		err = tap_get_minor(ipvtap_major, &vlantap->tap);
+		if (err)
+			return notifier_from_errno(err);
+
+		devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
+		classdev = device_create(&ipvtap_class, &dev->dev, devt,
+					 dev, "%s", tap_name);
+		if (IS_ERR(classdev)) {
+			tap_free_minor(ipvtap_major, &vlantap->tap);
+			return notifier_from_errno(PTR_ERR(classdev));
+		}
+		err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj,
+					tap_name);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+	case NETDEV_UNREGISTER:
+		/* vlan->minor == 0 if NETDEV_REGISTER above failed */
+		if (vlantap->tap.minor == 0)
+			break;
+		sysfs_remove_link(&dev->dev.kobj, tap_name);
+		devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
+		device_destroy(&ipvtap_class, devt);
+		tap_free_minor(ipvtap_major, &vlantap->tap);
+		break;
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
+		if (tap_queue_resize(&vlantap->tap))
+			return NOTIFY_BAD;
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipvtap_notifier_block __read_mostly = {
+	.notifier_call	= ipvtap_device_event,
+};
+
+static int __init ipvtap_init(void)
+{
+	int err;
+
+	err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap",
+			      THIS_MODULE);
+	if (err)
+		goto out1;
+
+	err = class_register(&ipvtap_class);
+	if (err)
+		goto out2;
+
+	err = register_netdevice_notifier(&ipvtap_notifier_block);
+	if (err)
+		goto out3;
+
+	err = ipvlan_link_register(&ipvtap_link_ops);
+	if (err)
+		goto out4;
+
+	return 0;
+
+out4:
+	unregister_netdevice_notifier(&ipvtap_notifier_block);
+out3:
+	class_unregister(&ipvtap_class);
+out2:
+	tap_destroy_cdev(ipvtap_major, &ipvtap_cdev);
+out1:
+	return err;
+}
+module_init(ipvtap_init);
+
+static void __exit ipvtap_exit(void)
+{
+	rtnl_link_unregister(&ipvtap_link_ops);
+	unregister_netdevice_notifier(&ipvtap_notifier_block);
+	class_unregister(&ipvtap_class);
+	tap_destroy_cdev(ipvtap_major, &ipvtap_cdev);
+}
+module_exit(ipvtap_exit);
+MODULE_ALIAS_RTNL_LINK("ipvtap");
+MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>");
+MODULE_LICENSE("GPL");
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ipvlan
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip