diff options
Diffstat (limited to 'drivers/net/amt.c')
-rw-r--r-- | drivers/net/amt.c | 3453 |
1 files changed, 3453 insertions, 0 deletions
diff --git a/drivers/net/amt.c b/drivers/net/amt.c new file mode 100644 index 0000000000..ddd087c2c3 --- /dev/null +++ b/drivers/net/amt.c @@ -0,0 +1,3453 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com> */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/jhash.h> +#include <linux/if_tunnel.h> +#include <linux/net.h> +#include <linux/igmp.h> +#include <linux/workqueue.h> +#include <net/pkt_sched.h> +#include <net/net_namespace.h> +#include <net/ip.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/icmp.h> +#include <net/mld.h> +#include <net/amt.h> +#include <uapi/linux/amt.h> +#include <linux/security.h> +#include <net/gro_cells.h> +#include <net/ipv6.h> +#include <net/if_inet6.h> +#include <net/ndisc.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#include <net/inet_common.h> +#include <net/ip6_checksum.h> + +static struct workqueue_struct *amt_wq; + +static HLIST_HEAD(source_gc_list); +/* Lock for source_gc_list */ +static spinlock_t source_gc_lock; +static struct delayed_work source_gc_wq; +static char *status_str[] = { + "AMT_STATUS_INIT", + "AMT_STATUS_SENT_DISCOVERY", + "AMT_STATUS_RECEIVED_DISCOVERY", + "AMT_STATUS_SENT_ADVERTISEMENT", + "AMT_STATUS_RECEIVED_ADVERTISEMENT", + "AMT_STATUS_SENT_REQUEST", + "AMT_STATUS_RECEIVED_REQUEST", + "AMT_STATUS_SENT_QUERY", + "AMT_STATUS_RECEIVED_QUERY", + "AMT_STATUS_SENT_UPDATE", + "AMT_STATUS_RECEIVED_UPDATE", +}; + +static char *type_str[] = { + "", /* Type 0 is not defined */ + "AMT_MSG_DISCOVERY", + "AMT_MSG_ADVERTISEMENT", + "AMT_MSG_REQUEST", + "AMT_MSG_MEMBERSHIP_QUERY", + "AMT_MSG_MEMBERSHIP_UPDATE", + "AMT_MSG_MULTICAST_DATA", + "AMT_MSG_TEARDOWN", +}; + +static char *action_str[] = { + "AMT_ACT_GMI", + "AMT_ACT_GMI_ZERO", + "AMT_ACT_GT", + "AMT_ACT_STATUS_FWD_NEW", + "AMT_ACT_STATUS_D_FWD_NEW", + "AMT_ACT_STATUS_NONE_NEW", +}; + +static struct igmpv3_grec igmpv3_zero_grec; + +#if IS_ENABLED(CONFIG_IPV6) +#define MLD2_ALL_NODE_INIT { { { 0xff, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } } +static struct in6_addr mld2_all_node = MLD2_ALL_NODE_INIT; +static struct mld2_grec mldv2_zero_grec; +#endif + +static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct amt_skb_cb) + sizeof(struct tc_skb_cb) > + sizeof_field(struct sk_buff, cb)); + + return (struct amt_skb_cb *)((void *)skb->cb + + sizeof(struct tc_skb_cb)); +} + +static void __amt_source_gc_work(void) +{ + struct amt_source_node *snode; + struct hlist_head gc_list; + struct hlist_node *t; + + spin_lock_bh(&source_gc_lock); + hlist_move_list(&source_gc_list, &gc_list); + spin_unlock_bh(&source_gc_lock); + + hlist_for_each_entry_safe(snode, t, &gc_list, node) { + hlist_del_rcu(&snode->node); + kfree_rcu(snode, rcu); + } +} + +static void amt_source_gc_work(struct work_struct *work) +{ + __amt_source_gc_work(); + + spin_lock_bh(&source_gc_lock); + mod_delayed_work(amt_wq, &source_gc_wq, + msecs_to_jiffies(AMT_GC_INTERVAL)); + spin_unlock_bh(&source_gc_lock); +} + +static bool amt_addr_equal(union amt_addr *a, union amt_addr *b) +{ + return !memcmp(a, b, sizeof(union amt_addr)); +} + +static u32 amt_source_hash(struct amt_tunnel_list *tunnel, union amt_addr *src) +{ + u32 hash = jhash(src, sizeof(*src), tunnel->amt->hash_seed); + + return reciprocal_scale(hash, tunnel->amt->hash_buckets); +} + +static bool amt_status_filter(struct amt_source_node *snode, + enum amt_filter filter) +{ + bool rc = false; + + switch (filter) { + case AMT_FILTER_FWD: + if (snode->status == AMT_SOURCE_STATUS_FWD && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_D_FWD: + if (snode->status == AMT_SOURCE_STATUS_D_FWD && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_FWD_NEW: + if (snode->status == AMT_SOURCE_STATUS_FWD && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_D_FWD_NEW: + if (snode->status == AMT_SOURCE_STATUS_D_FWD && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_ALL: + rc = true; + break; + case AMT_FILTER_NONE_NEW: + if (snode->status == AMT_SOURCE_STATUS_NONE && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + case AMT_FILTER_BOTH: + if ((snode->status == AMT_SOURCE_STATUS_D_FWD || + snode->status == AMT_SOURCE_STATUS_FWD) && + snode->flags == AMT_SOURCE_OLD) + rc = true; + break; + case AMT_FILTER_BOTH_NEW: + if ((snode->status == AMT_SOURCE_STATUS_D_FWD || + snode->status == AMT_SOURCE_STATUS_FWD) && + snode->flags == AMT_SOURCE_NEW) + rc = true; + break; + default: + WARN_ON_ONCE(1); + break; + } + + return rc; +} + +static struct amt_source_node *amt_lookup_src(struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + enum amt_filter filter, + union amt_addr *src) +{ + u32 hash = amt_source_hash(tunnel, src); + struct amt_source_node *snode; + + hlist_for_each_entry_rcu(snode, &gnode->sources[hash], node) + if (amt_status_filter(snode, filter) && + amt_addr_equal(&snode->source_addr, src)) + return snode; + + return NULL; +} + +static u32 amt_group_hash(struct amt_tunnel_list *tunnel, union amt_addr *group) +{ + u32 hash = jhash(group, sizeof(*group), tunnel->amt->hash_seed); + + return reciprocal_scale(hash, tunnel->amt->hash_buckets); +} + +static struct amt_group_node *amt_lookup_group(struct amt_tunnel_list *tunnel, + union amt_addr *group, + union amt_addr *host, + bool v6) +{ + u32 hash = amt_group_hash(tunnel, group); + struct amt_group_node *gnode; + + hlist_for_each_entry_rcu(gnode, &tunnel->groups[hash], node) { + if (amt_addr_equal(&gnode->group_addr, group) && + amt_addr_equal(&gnode->host_addr, host) && + gnode->v6 == v6) + return gnode; + } + + return NULL; +} + +static void amt_destroy_source(struct amt_source_node *snode) +{ + struct amt_group_node *gnode = snode->gnode; + struct amt_tunnel_list *tunnel; + + tunnel = gnode->tunnel_list; + + if (!gnode->v6) { + netdev_dbg(snode->gnode->amt->dev, + "Delete source %pI4 from %pI4\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + } else { + netdev_dbg(snode->gnode->amt->dev, + "Delete source %pI6 from %pI6\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6); +#endif + } + + cancel_delayed_work(&snode->source_timer); + hlist_del_init_rcu(&snode->node); + tunnel->nr_sources--; + gnode->nr_sources--; + spin_lock_bh(&source_gc_lock); + hlist_add_head_rcu(&snode->node, &source_gc_list); + spin_unlock_bh(&source_gc_lock); +} + +static void amt_del_group(struct amt_dev *amt, struct amt_group_node *gnode) +{ + struct amt_source_node *snode; + struct hlist_node *t; + int i; + + if (cancel_delayed_work(&gnode->group_timer)) + dev_put(amt->dev); + hlist_del_rcu(&gnode->node); + gnode->tunnel_list->nr_groups--; + + if (!gnode->v6) + netdev_dbg(amt->dev, "Leave group %pI4\n", + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Leave group %pI6\n", + &gnode->group_addr.ip6); +#endif + for (i = 0; i < amt->hash_buckets; i++) + hlist_for_each_entry_safe(snode, t, &gnode->sources[i], node) + amt_destroy_source(snode); + + /* tunnel->lock was acquired outside of amt_del_group() + * But rcu_read_lock() was acquired too so It's safe. + */ + kfree_rcu(gnode, rcu); +} + +/* If a source timer expires with a router filter-mode for the group of + * INCLUDE, the router concludes that traffic from this particular + * source is no longer desired on the attached network, and deletes the + * associated source record. + */ +static void amt_source_work(struct work_struct *work) +{ + struct amt_source_node *snode = container_of(to_delayed_work(work), + struct amt_source_node, + source_timer); + struct amt_group_node *gnode = snode->gnode; + struct amt_dev *amt = gnode->amt; + struct amt_tunnel_list *tunnel; + + tunnel = gnode->tunnel_list; + spin_lock_bh(&tunnel->lock); + rcu_read_lock(); + if (gnode->filter_mode == MCAST_INCLUDE) { + amt_destroy_source(snode); + if (!gnode->nr_sources) + amt_del_group(amt, gnode); + } else { + /* When a router filter-mode for a group is EXCLUDE, + * source records are only deleted when the group timer expires + */ + snode->status = AMT_SOURCE_STATUS_D_FWD; + } + rcu_read_unlock(); + spin_unlock_bh(&tunnel->lock); +} + +static void amt_act_src(struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + struct amt_source_node *snode, + enum amt_act act) +{ + struct amt_dev *amt = tunnel->amt; + + switch (act) { + case AMT_ACT_GMI: + mod_delayed_work(amt_wq, &snode->source_timer, + msecs_to_jiffies(amt_gmi(amt))); + break; + case AMT_ACT_GMI_ZERO: + cancel_delayed_work(&snode->source_timer); + break; + case AMT_ACT_GT: + mod_delayed_work(amt_wq, &snode->source_timer, + gnode->group_timer.timer.expires); + break; + case AMT_ACT_STATUS_FWD_NEW: + snode->status = AMT_SOURCE_STATUS_FWD; + snode->flags = AMT_SOURCE_NEW; + break; + case AMT_ACT_STATUS_D_FWD_NEW: + snode->status = AMT_SOURCE_STATUS_D_FWD; + snode->flags = AMT_SOURCE_NEW; + break; + case AMT_ACT_STATUS_NONE_NEW: + cancel_delayed_work(&snode->source_timer); + snode->status = AMT_SOURCE_STATUS_NONE; + snode->flags = AMT_SOURCE_NEW; + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (!gnode->v6) + netdev_dbg(amt->dev, "Source %pI4 from %pI4 Acted %s\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4, + action_str[act]); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Source %pI6 from %pI6 Acted %s\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6, + action_str[act]); +#endif +} + +static struct amt_source_node *amt_alloc_snode(struct amt_group_node *gnode, + union amt_addr *src) +{ + struct amt_source_node *snode; + + snode = kzalloc(sizeof(*snode), GFP_ATOMIC); + if (!snode) + return NULL; + + memcpy(&snode->source_addr, src, sizeof(union amt_addr)); + snode->gnode = gnode; + snode->status = AMT_SOURCE_STATUS_NONE; + snode->flags = AMT_SOURCE_NEW; + INIT_HLIST_NODE(&snode->node); + INIT_DELAYED_WORK(&snode->source_timer, amt_source_work); + + return snode; +} + +/* RFC 3810 - 7.2.2. Definition of Filter Timers + * + * Router Mode Filter Timer Actions/Comments + * ----------- ----------------- ---------------- + * + * INCLUDE Not Used All listeners in + * INCLUDE mode. + * + * EXCLUDE Timer > 0 At least one listener + * in EXCLUDE mode. + * + * EXCLUDE Timer == 0 No more listeners in + * EXCLUDE mode for the + * multicast address. + * If the Requested List + * is empty, delete + * Multicast Address + * Record. If not, switch + * to INCLUDE filter mode; + * the sources in the + * Requested List are + * moved to the Include + * List, and the Exclude + * List is deleted. + */ +static void amt_group_work(struct work_struct *work) +{ + struct amt_group_node *gnode = container_of(to_delayed_work(work), + struct amt_group_node, + group_timer); + struct amt_tunnel_list *tunnel = gnode->tunnel_list; + struct amt_dev *amt = gnode->amt; + struct amt_source_node *snode; + bool delete_group = true; + struct hlist_node *t; + int i, buckets; + + buckets = amt->hash_buckets; + + spin_lock_bh(&tunnel->lock); + if (gnode->filter_mode == MCAST_INCLUDE) { + /* Not Used */ + spin_unlock_bh(&tunnel->lock); + goto out; + } + + rcu_read_lock(); + for (i = 0; i < buckets; i++) { + hlist_for_each_entry_safe(snode, t, + &gnode->sources[i], node) { + if (!delayed_work_pending(&snode->source_timer) || + snode->status == AMT_SOURCE_STATUS_D_FWD) { + amt_destroy_source(snode); + } else { + delete_group = false; + snode->status = AMT_SOURCE_STATUS_FWD; + } + } + } + if (delete_group) + amt_del_group(amt, gnode); + else + gnode->filter_mode = MCAST_INCLUDE; + rcu_read_unlock(); + spin_unlock_bh(&tunnel->lock); +out: + dev_put(amt->dev); +} + +/* Non-existent group is created as INCLUDE {empty}: + * + * RFC 3376 - 5.1. Action on Change of Interface State + * + * If no interface state existed for that multicast address before + * the change (i.e., the change consisted of creating a new + * per-interface record), or if no state exists after the change + * (i.e., the change consisted of deleting a per-interface record), + * then the "non-existent" state is considered to have a filter mode + * of INCLUDE and an empty source list. + */ +static struct amt_group_node *amt_add_group(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + union amt_addr *group, + union amt_addr *host, + bool v6) +{ + struct amt_group_node *gnode; + u32 hash; + int i; + + if (tunnel->nr_groups >= amt->max_groups) + return ERR_PTR(-ENOSPC); + + gnode = kzalloc(sizeof(*gnode) + + (sizeof(struct hlist_head) * amt->hash_buckets), + GFP_ATOMIC); + if (unlikely(!gnode)) + return ERR_PTR(-ENOMEM); + + gnode->amt = amt; + gnode->group_addr = *group; + gnode->host_addr = *host; + gnode->v6 = v6; + gnode->tunnel_list = tunnel; + gnode->filter_mode = MCAST_INCLUDE; + INIT_HLIST_NODE(&gnode->node); + INIT_DELAYED_WORK(&gnode->group_timer, amt_group_work); + for (i = 0; i < amt->hash_buckets; i++) + INIT_HLIST_HEAD(&gnode->sources[i]); + + hash = amt_group_hash(tunnel, group); + hlist_add_head_rcu(&gnode->node, &tunnel->groups[hash]); + tunnel->nr_groups++; + + if (!gnode->v6) + netdev_dbg(amt->dev, "Join group %pI4\n", + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(amt->dev, "Join group %pI6\n", + &gnode->group_addr.ip6); +#endif + + return gnode; +} + +static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) +{ + u8 ra[AMT_IPHDR_OPTS] = { IPOPT_RA, 4, 0, 0 }; + int hlen = LL_RESERVED_SPACE(amt->dev); + int tlen = amt->dev->needed_tailroom; + struct igmpv3_query *ihv3; + void *csum_start = NULL; + __sum16 *csum = NULL; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + unsigned int len; + int offset; + + len = hlen + tlen + sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) + return NULL; + + skb_reserve(skb, hlen); + skb_push(skb, sizeof(*eth)); + skb->protocol = htons(ETH_P_IP); + skb_reset_mac_header(skb); + skb->priority = TC_PRIO_CONTROL; + skb_put(skb, sizeof(*iph)); + skb_put_data(skb, ra, sizeof(ra)); + skb_put(skb, sizeof(*ihv3)); + skb_pull(skb, sizeof(*eth)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr) + AMT_IPHDR_OPTS) >> 2; + iph->tos = AMT_TOS; + iph->tot_len = htons(sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3)); + iph->frag_off = htons(IP_DF); + iph->ttl = 1; + iph->id = 0; + iph->protocol = IPPROTO_IGMP; + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); + iph->saddr = htonl(INADDR_ANY); + ip_send_check(iph); + + eth = eth_hdr(skb); + ether_addr_copy(eth->h_source, amt->dev->dev_addr); + ip_eth_mc_map(htonl(INADDR_ALLHOSTS_GROUP), eth->h_dest); + eth->h_proto = htons(ETH_P_IP); + + ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS); + skb_reset_transport_header(skb); + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; + ihv3->code = 1; + ihv3->group = 0; + ihv3->qqic = amt->qi; + ihv3->nsrcs = 0; + ihv3->resv = 0; + ihv3->suppress = false; + ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); + ihv3->csum = 0; + csum = &ihv3->csum; + csum_start = (void *)ihv3; + *csum = ip_compute_csum(csum_start, sizeof(*ihv3)); + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + skb->ip_summed = CHECKSUM_NONE; + + skb_push(skb, sizeof(*eth) + sizeof(*iph) + AMT_IPHDR_OPTS); + + return skb; +} + +static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, + bool validate) +{ + if (validate && amt->status >= status) + return; + netdev_dbg(amt->dev, "Update GW status %s -> %s", + status_str[amt->status], status_str[status]); + WRITE_ONCE(amt->status, status); +} + +static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, + enum amt_status status, + bool validate) +{ + if (validate && tunnel->status >= status) + return; + netdev_dbg(tunnel->amt->dev, + "Update Tunnel(IP = %pI4, PORT = %u) status %s -> %s", + &tunnel->ip4, ntohs(tunnel->source_port), + status_str[tunnel->status], status_str[status]); + tunnel->status = status; +} + +static void amt_update_relay_status(struct amt_tunnel_list *tunnel, + enum amt_status status, bool validate) +{ + spin_lock_bh(&tunnel->lock); + __amt_update_relay_status(tunnel, status, validate); + spin_unlock_bh(&tunnel->lock); +} + +static void amt_send_discovery(struct amt_dev *amt) +{ + struct amt_header_discovery *amtd; + int hlen, tlen, offset; + struct socket *sock; + struct udphdr *udph; + struct sk_buff *skb; + struct iphdr *iph; + struct rtable *rt; + struct flowi4 fl4; + u32 len; + int err; + + rcu_read_lock(); + sock = rcu_dereference(amt->sock); + if (!sock) + goto out; + + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev)) + goto out; + + rt = ip_route_output_ports(amt->net, &fl4, sock->sk, + amt->discovery_ip, amt->local_ip, + amt->gw_port, amt->relay_port, + IPPROTO_UDP, 0, + amt->stream_dev->ifindex); + if (IS_ERR(rt)) { + amt->dev->stats.tx_errors++; + goto out; + } + + hlen = LL_RESERVED_SPACE(amt->dev); + tlen = amt->dev->needed_tailroom; + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtd); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) { + ip_rt_put(rt); + amt->dev->stats.tx_errors++; + goto out; + } + + skb->priority = TC_PRIO_CONTROL; + skb_dst_set(skb, &rt->dst); + + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtd); + skb_reset_network_header(skb); + skb_put(skb, len); + amtd = skb_pull(skb, sizeof(*iph) + sizeof(*udph)); + amtd->version = 0; + amtd->type = AMT_MSG_DISCOVERY; + amtd->reserved = 0; + amtd->nonce = amt->nonce; + skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + udph->source = amt->gw_port; + udph->dest = amt->relay_port; + udph->len = htons(sizeof(*udph) + sizeof(*amtd)); + udph->check = 0; + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + udph->check = csum_tcpudp_magic(amt->local_ip, amt->discovery_ip, + sizeof(*udph) + sizeof(*amtd), + IPPROTO_UDP, skb->csum); + + skb_push(skb, sizeof(*iph)); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr)) >> 2; + iph->tos = AMT_TOS; + iph->frag_off = 0; + iph->ttl = ip4_dst_hoplimit(&rt->dst); + iph->daddr = amt->discovery_ip; + iph->saddr = amt->local_ip; + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(len); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(amt->net, skb, NULL); + ip_send_check(iph); + err = ip_local_out(amt->net, sock->sk, skb); + if (unlikely(net_xmit_eval(err))) + amt->dev->stats.tx_errors++; + + amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); +out: + rcu_read_unlock(); +} + +static void amt_send_request(struct amt_dev *amt, bool v6) +{ + struct amt_header_request *amtrh; + int hlen, tlen, offset; + struct socket *sock; + struct udphdr *udph; + struct sk_buff *skb; + struct iphdr *iph; + struct rtable *rt; + struct flowi4 fl4; + u32 len; + int err; + + rcu_read_lock(); + sock = rcu_dereference(amt->sock); + if (!sock) + goto out; + + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev)) + goto out; + + rt = ip_route_output_ports(amt->net, &fl4, sock->sk, + amt->remote_ip, amt->local_ip, + amt->gw_port, amt->relay_port, + IPPROTO_UDP, 0, + amt->stream_dev->ifindex); + if (IS_ERR(rt)) { + amt->dev->stats.tx_errors++; + goto out; + } + + hlen = LL_RESERVED_SPACE(amt->dev); + tlen = amt->dev->needed_tailroom; + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) { + ip_rt_put(rt); + amt->dev->stats.tx_errors++; + goto out; + } + + skb->priority = TC_PRIO_CONTROL; + skb_dst_set(skb, &rt->dst); + + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh); + skb_reset_network_header(skb); + skb_put(skb, len); + amtrh = skb_pull(skb, sizeof(*iph) + sizeof(*udph)); + amtrh->version = 0; + amtrh->type = AMT_MSG_REQUEST; + amtrh->reserved1 = 0; + amtrh->p = v6; + amtrh->reserved2 = 0; + amtrh->nonce = amt->nonce; + skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + udph->source = amt->gw_port; + udph->dest = amt->relay_port; + udph->len = htons(sizeof(*amtrh) + sizeof(*udph)); + udph->check = 0; + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + udph->check = csum_tcpudp_magic(amt->local_ip, amt->remote_ip, + sizeof(*udph) + sizeof(*amtrh), + IPPROTO_UDP, skb->csum); + + skb_push(skb, sizeof(*iph)); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr)) >> 2; + iph->tos = AMT_TOS; + iph->frag_off = 0; + iph->ttl = ip4_dst_hoplimit(&rt->dst); + iph->daddr = amt->remote_ip; + iph->saddr = amt->local_ip; + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(len); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(amt->net, skb, NULL); + ip_send_check(iph); + err = ip_local_out(amt->net, sock->sk, skb); + if (unlikely(net_xmit_eval(err))) + amt->dev->stats.tx_errors++; + +out: + rcu_read_unlock(); +} + +static void amt_send_igmp_gq(struct amt_dev *amt, + struct amt_tunnel_list *tunnel) +{ + struct sk_buff *skb; + + skb = amt_build_igmp_gq(amt); + if (!skb) + return; + + amt_skb_cb(skb)->tunnel = tunnel; + dev_queue_xmit(skb); +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct sk_buff *amt_build_mld_gq(struct amt_dev *amt) +{ + u8 ra[AMT_IP6HDR_OPTS] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, + 2, 0, 0, IPV6_TLV_PAD1, IPV6_TLV_PAD1 }; + int hlen = LL_RESERVED_SPACE(amt->dev); + int tlen = amt->dev->needed_tailroom; + struct mld2_query *mld2q; + void *csum_start = NULL; + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + u32 len; + + len = hlen + tlen + sizeof(*ip6h) + sizeof(ra) + sizeof(*mld2q); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) + return NULL; + + skb_reserve(skb, hlen); + skb_push(skb, sizeof(*eth)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->priority = TC_PRIO_CONTROL; + skb->protocol = htons(ETH_P_IPV6); + skb_put_zero(skb, sizeof(*ip6h)); + skb_put_data(skb, ra, sizeof(ra)); + skb_put_zero(skb, sizeof(*mld2q)); + skb_pull(skb, sizeof(*eth)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(sizeof(ra) + sizeof(*mld2q)); + ip6h->nexthdr = NEXTHDR_HOP; + ip6h->hop_limit = 1; + ip6h->daddr = mld2_all_node; + ip6_flow_hdr(ip6h, 0, 0); + + if (ipv6_dev_get_saddr(amt->net, amt->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + amt->dev->stats.tx_errors++; + kfree_skb(skb); + return NULL; + } + + eth->h_proto = htons(ETH_P_IPV6); + ether_addr_copy(eth->h_source, amt->dev->dev_addr); + ipv6_eth_mc_map(&mld2_all_node, eth->h_dest); + + skb_pull(skb, sizeof(*ip6h) + sizeof(ra)); + skb_reset_transport_header(skb); + mld2q = (struct mld2_query *)icmp6_hdr(skb); + mld2q->mld2q_mrc = htons(1); + mld2q->mld2q_type = ICMPV6_MGM_QUERY; + mld2q->mld2q_code = 0; + mld2q->mld2q_cksum = 0; + mld2q->mld2q_resv1 = 0; + mld2q->mld2q_resv2 = 0; + mld2q->mld2q_suppress = 0; + mld2q->mld2q_qrv = amt->qrv; + mld2q->mld2q_nsrcs = 0; + mld2q->mld2q_qqic = amt->qi; + csum_start = (void *)mld2q; + mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mld2q), + IPPROTO_ICMPV6, + csum_partial(csum_start, + sizeof(*mld2q), 0)); + + skb->ip_summed = CHECKSUM_NONE; + skb_push(skb, sizeof(*eth) + sizeof(*ip6h) + sizeof(ra)); + return skb; +} + +static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) +{ + struct sk_buff *skb; + + skb = amt_build_mld_gq(amt); + if (!skb) + return; + + amt_skb_cb(skb)->tunnel = tunnel; + dev_queue_xmit(skb); +} +#else +static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) +{ +} +#endif + +static bool amt_queue_event(struct amt_dev *amt, enum amt_event event, + struct sk_buff *skb) +{ + int index; + + spin_lock_bh(&amt->lock); + if (amt->nr_events >= AMT_MAX_EVENTS) { + spin_unlock_bh(&amt->lock); + return 1; + } + + index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS; + amt->events[index].event = event; + amt->events[index].skb = skb; + amt->nr_events++; + amt->event_idx %= AMT_MAX_EVENTS; + queue_work(amt_wq, &amt->event_wq); + spin_unlock_bh(&amt->lock); + + return 0; +} + +static void amt_secret_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + secret_wq); + + spin_lock_bh(&amt->lock); + get_random_bytes(&amt->key, sizeof(siphash_key_t)); + spin_unlock_bh(&amt->lock); + mod_delayed_work(amt_wq, &amt->secret_wq, + msecs_to_jiffies(AMT_SECRET_TIMEOUT)); +} + +static void amt_event_send_discovery(struct amt_dev *amt) +{ + if (amt->status > AMT_STATUS_SENT_DISCOVERY) + goto out; + get_random_bytes(&amt->nonce, sizeof(__be32)); + + amt_send_discovery(amt); +out: + mod_delayed_work(amt_wq, &amt->discovery_wq, + msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); +} + +static void amt_discovery_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + discovery_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL)) + mod_delayed_work(amt_wq, &amt->discovery_wq, + msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); +} + +static void amt_event_send_request(struct amt_dev *amt) +{ + u32 exp; + + if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT) + goto out; + + if (amt->req_cnt > AMT_MAX_REQ_COUNT) { + netdev_dbg(amt->dev, "Gateway is not ready"); + amt->qi = AMT_INIT_REQ_TIMEOUT; + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); + amt->remote_ip = 0; + amt_update_gw_status(amt, AMT_STATUS_INIT, false); + amt->req_cnt = 0; + amt->nonce = 0; + goto out; + } + + if (!amt->req_cnt) { + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); + get_random_bytes(&amt->nonce, sizeof(__be32)); + } + + amt_send_request(amt, false); + amt_send_request(amt, true); + amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); + amt->req_cnt++; +out: + exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT); + mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000)); +} + +static void amt_req_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + req_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL)) + mod_delayed_work(amt_wq, &amt->req_wq, + msecs_to_jiffies(100)); +} + +static bool amt_send_membership_update(struct amt_dev *amt, + struct sk_buff *skb, + bool v6) +{ + struct amt_header_membership_update *amtmu; + struct socket *sock; + struct iphdr *iph; + struct flowi4 fl4; + struct rtable *rt; + int err; + + sock = rcu_dereference_bh(amt->sock); + if (!sock) + return true; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(amt->dev) + sizeof(*amtmu) + + sizeof(*iph) + sizeof(struct udphdr)); + if (err) + return true; + + skb_reset_inner_headers(skb); + memset(&fl4, 0, sizeof(struct flowi4)); + fl4.flowi4_oif = amt->stream_dev->ifindex; + fl4.daddr = amt->remote_ip; + fl4.saddr = amt->local_ip; + fl4.flowi4_tos = AMT_TOS; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(amt->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(amt->dev, "no route to %pI4\n", &amt->remote_ip); + return true; + } + + amtmu = skb_push(skb, sizeof(*amtmu)); + amtmu->version = 0; + amtmu->type = AMT_MSG_MEMBERSHIP_UPDATE; + amtmu->reserved = 0; + amtmu->nonce = amt->nonce; + amtmu->response_mac = amt->mac; + + if (!v6) + skb_set_inner_protocol(skb, htons(ETH_P_IP)); + else + skb_set_inner_protocol(skb, htons(ETH_P_IPV6)); + udp_tunnel_xmit_skb(rt, sock->sk, skb, + fl4.saddr, + fl4.daddr, + AMT_TOS, + ip4_dst_hoplimit(&rt->dst), + 0, + amt->gw_port, + amt->relay_port, + false, + false); + amt_update_gw_status(amt, AMT_STATUS_SENT_UPDATE, true); + return false; +} + +static void amt_send_multicast_data(struct amt_dev *amt, + const struct sk_buff *oskb, + struct amt_tunnel_list *tunnel, + bool v6) +{ + struct amt_header_mcast_data *amtmd; + struct socket *sock; + struct sk_buff *skb; + struct iphdr *iph; + struct flowi4 fl4; + struct rtable *rt; + + sock = rcu_dereference_bh(amt->sock); + if (!sock) + return; + + skb = skb_copy_expand(oskb, sizeof(*amtmd) + sizeof(*iph) + + sizeof(struct udphdr), 0, GFP_ATOMIC); + if (!skb) + return; + + skb_reset_inner_headers(skb); + memset(&fl4, 0, sizeof(struct flowi4)); + fl4.flowi4_oif = amt->stream_dev->ifindex; + fl4.daddr = tunnel->ip4; + fl4.saddr = amt->local_ip; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(amt->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(amt->dev, "no route to %pI4\n", &tunnel->ip4); + kfree_skb(skb); + return; + } + + amtmd = skb_push(skb, sizeof(*amtmd)); + amtmd->version = 0; + amtmd->reserved = 0; + amtmd->type = AMT_MSG_MULTICAST_DATA; + + if (!v6) + skb_set_inner_protocol(skb, htons(ETH_P_IP)); + else + skb_set_inner_protocol(skb, htons(ETH_P_IPV6)); + udp_tunnel_xmit_skb(rt, sock->sk, skb, + fl4.saddr, + fl4.daddr, + AMT_TOS, + ip4_dst_hoplimit(&rt->dst), + 0, + amt->relay_port, + tunnel->source_port, + false, + false); +} + +static bool amt_send_membership_query(struct amt_dev *amt, + struct sk_buff *skb, + struct amt_tunnel_list *tunnel, + bool v6) +{ + struct amt_header_membership_query *amtmq; + struct socket *sock; + struct rtable *rt; + struct flowi4 fl4; + int err; + + sock = rcu_dereference_bh(amt->sock); + if (!sock) + return true; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(amt->dev) + sizeof(*amtmq) + + sizeof(struct iphdr) + sizeof(struct udphdr)); + if (err) + return true; + + skb_reset_inner_headers(skb); + memset(&fl4, 0, sizeof(struct flowi4)); + fl4.flowi4_oif = amt->stream_dev->ifindex; + fl4.daddr = tunnel->ip4; + fl4.saddr = amt->local_ip; + fl4.flowi4_tos = AMT_TOS; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(amt->net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(amt->dev, "no route to %pI4\n", &tunnel->ip4); + return true; + } + + amtmq = skb_push(skb, sizeof(*amtmq)); + amtmq->version = 0; + amtmq->type = AMT_MSG_MEMBERSHIP_QUERY; + amtmq->reserved = 0; + amtmq->l = 0; + amtmq->g = 0; + amtmq->nonce = tunnel->nonce; + amtmq->response_mac = tunnel->mac; + + if (!v6) + skb_set_inner_protocol(skb, htons(ETH_P_IP)); + else + skb_set_inner_protocol(skb, htons(ETH_P_IPV6)); + udp_tunnel_xmit_skb(rt, sock->sk, skb, + fl4.saddr, + fl4.daddr, + AMT_TOS, + ip4_dst_hoplimit(&rt->dst), + 0, + amt->relay_port, + tunnel->source_port, + false, + false); + amt_update_relay_status(tunnel, AMT_STATUS_SENT_QUERY, true); + return false; +} + +static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + struct amt_tunnel_list *tunnel; + struct amt_group_node *gnode; + union amt_addr group = {0,}; +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h; + struct mld_msg *mld; +#endif + bool report = false; + struct igmphdr *ih; + bool query = false; + struct iphdr *iph; + bool data = false; + bool v6 = false; + u32 hash; + + iph = ip_hdr(skb); + if (iph->version == 4) { + if (!ipv4_is_multicast(iph->daddr)) + goto free; + + if (!ip_mc_check_igmp(skb)) { + ih = igmp_hdr(skb); + switch (ih->type) { + case IGMPV3_HOST_MEMBERSHIP_REPORT: + case IGMP_HOST_MEMBERSHIP_REPORT: + report = true; + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + query = true; + break; + default: + goto free; + } + } else { + data = true; + } + v6 = false; + group.ip4 = iph->daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (iph->version == 6) { + ip6h = ipv6_hdr(skb); + if (!ipv6_addr_is_multicast(&ip6h->daddr)) + goto free; + + if (!ipv6_mc_check_mld(skb)) { + mld = (struct mld_msg *)skb_transport_header(skb); + switch (mld->mld_type) { + case ICMPV6_MGM_REPORT: + case ICMPV6_MLD2_REPORT: + report = true; + break; + case ICMPV6_MGM_QUERY: + query = true; + break; + default: + goto free; + } + } else { + data = true; + } + v6 = true; + group.ip6 = ip6h->daddr; +#endif + } else { + dev->stats.tx_errors++; + goto free; + } + + if (!pskb_may_pull(skb, sizeof(struct ethhdr))) + goto free; + + skb_pull(skb, sizeof(struct ethhdr)); + + if (amt->mode == AMT_MODE_GATEWAY) { + /* Gateway only passes IGMP/MLD packets */ + if (!report) + goto free; + if ((!v6 && !READ_ONCE(amt->ready4)) || + (v6 && !READ_ONCE(amt->ready6))) + goto free; + if (amt_send_membership_update(amt, skb, v6)) + goto free; + goto unlock; + } else if (amt->mode == AMT_MODE_RELAY) { + if (query) { + tunnel = amt_skb_cb(skb)->tunnel; + if (!tunnel) { + WARN_ON(1); + goto free; + } + + /* Do not forward unexpected query */ + if (amt_send_membership_query(amt, skb, tunnel, v6)) + goto free; + goto unlock; + } + + if (!data) + goto free; + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list) { + hash = amt_group_hash(tunnel, &group); + hlist_for_each_entry_rcu(gnode, &tunnel->groups[hash], + node) { + if (!v6) { + if (gnode->group_addr.ip4 == iph->daddr) + goto found; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_equal(&gnode->group_addr.ip6, + &ip6h->daddr)) + goto found; +#endif + } + } + continue; +found: + amt_send_multicast_data(amt, skb, tunnel, v6); + } + } + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +free: + dev_kfree_skb(skb); +unlock: + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +static int amt_parse_type(struct sk_buff *skb) +{ + struct amt_header *amth; + + if (!pskb_may_pull(skb, sizeof(struct udphdr) + + sizeof(struct amt_header))) + return -1; + + amth = (struct amt_header *)(udp_hdr(skb) + 1); + + if (amth->version != 0) + return -1; + + if (amth->type >= __AMT_MSG_MAX || !amth->type) + return -1; + return amth->type; +} + +static void amt_clear_groups(struct amt_tunnel_list *tunnel) +{ + struct amt_dev *amt = tunnel->amt; + struct amt_group_node *gnode; + struct hlist_node *t; + int i; + + spin_lock_bh(&tunnel->lock); + rcu_read_lock(); + for (i = 0; i < amt->hash_buckets; i++) + hlist_for_each_entry_safe(gnode, t, &tunnel->groups[i], node) + amt_del_group(amt, gnode); + rcu_read_unlock(); + spin_unlock_bh(&tunnel->lock); +} + +static void amt_tunnel_expire(struct work_struct *work) +{ + struct amt_tunnel_list *tunnel = container_of(to_delayed_work(work), + struct amt_tunnel_list, + gc_wq); + struct amt_dev *amt = tunnel->amt; + + spin_lock_bh(&amt->lock); + rcu_read_lock(); + list_del_rcu(&tunnel->list); + amt->nr_tunnels--; + amt_clear_groups(tunnel); + rcu_read_unlock(); + spin_unlock_bh(&amt->lock); + kfree_rcu(tunnel, rcu); +} + +static void amt_cleanup_srcs(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode) +{ + struct amt_source_node *snode; + struct hlist_node *t; + int i; + + /* Delete old sources */ + for (i = 0; i < amt->hash_buckets; i++) { + hlist_for_each_entry_safe(snode, t, &gnode->sources[i], node) { + if (snode->flags == AMT_SOURCE_OLD) + amt_destroy_source(snode); + } + } + + /* switch from new to old */ + for (i = 0; i < amt->hash_buckets; i++) { + hlist_for_each_entry_rcu(snode, &gnode->sources[i], node) { + snode->flags = AMT_SOURCE_OLD; + if (!gnode->v6) + netdev_dbg(snode->gnode->amt->dev, + "Add source as OLD %pI4 from %pI4\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(snode->gnode->amt->dev, + "Add source as OLD %pI6 from %pI6\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6); +#endif + } + } +} + +static void amt_add_srcs(struct amt_dev *amt, struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, void *grec, + bool v6) +{ + struct igmpv3_grec *igmp_grec; + struct amt_source_node *snode; +#if IS_ENABLED(CONFIG_IPV6) + struct mld2_grec *mld_grec; +#endif + union amt_addr src = {0,}; + u16 nsrcs; + u32 hash; + int i; + + if (!v6) { + igmp_grec = grec; + nsrcs = ntohs(igmp_grec->grec_nsrcs); + } else { +#if IS_ENABLED(CONFIG_IPV6) + mld_grec = grec; + nsrcs = ntohs(mld_grec->grec_nsrcs); +#else + return; +#endif + } + for (i = 0; i < nsrcs; i++) { + if (tunnel->nr_sources >= amt->max_sources) + return; + if (!v6) + src.ip4 = igmp_grec->grec_src[i]; +#if IS_ENABLED(CONFIG_IPV6) + else + memcpy(&src.ip6, &mld_grec->grec_src[i], + sizeof(struct in6_addr)); +#endif + if (amt_lookup_src(tunnel, gnode, AMT_FILTER_ALL, &src)) + continue; + + snode = amt_alloc_snode(gnode, &src); + if (snode) { + hash = amt_source_hash(tunnel, &snode->source_addr); + hlist_add_head_rcu(&snode->node, &gnode->sources[hash]); + tunnel->nr_sources++; + gnode->nr_sources++; + + if (!gnode->v6) + netdev_dbg(snode->gnode->amt->dev, + "Add source as NEW %pI4 from %pI4\n", + &snode->source_addr.ip4, + &gnode->group_addr.ip4); +#if IS_ENABLED(CONFIG_IPV6) + else + netdev_dbg(snode->gnode->amt->dev, + "Add source as NEW %pI6 from %pI6\n", + &snode->source_addr.ip6, + &gnode->group_addr.ip6); +#endif + } + } +} + +/* Router State Report Rec'd New Router State + * ------------ ------------ ---------------- + * EXCLUDE (X,Y) IS_IN (A) EXCLUDE (X+A,Y-A) + * + * -----------+-----------+-----------+ + * | OLD | NEW | + * -----------+-----------+-----------+ + * FWD | X | X+A | + * -----------+-----------+-----------+ + * D_FWD | Y | Y-A | + * -----------+-----------+-----------+ + * NONE | | A | + * -----------+-----------+-----------+ + * + * a) Received sources are NONE/NEW + * b) All NONE will be deleted by amt_cleanup_srcs(). + * c) All OLD will be deleted by amt_cleanup_srcs(). + * d) After delete, NEW source will be switched to OLD. + */ +static void amt_lookup_act_srcs(struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, + enum amt_ops ops, + enum amt_filter filter, + enum amt_act act, + bool v6) +{ + struct amt_dev *amt = tunnel->amt; + struct amt_source_node *snode; + struct igmpv3_grec *igmp_grec; +#if IS_ENABLED(CONFIG_IPV6) + struct mld2_grec *mld_grec; +#endif + union amt_addr src = {0,}; + struct hlist_node *t; + u16 nsrcs; + int i, j; + + if (!v6) { + igmp_grec = grec; + nsrcs = ntohs(igmp_grec->grec_nsrcs); + } else { +#if IS_ENABLED(CONFIG_IPV6) + mld_grec = grec; + nsrcs = ntohs(mld_grec->grec_nsrcs); +#else + return; +#endif + } + + memset(&src, 0, sizeof(union amt_addr)); + switch (ops) { + case AMT_OPS_INT: + /* A*B */ + for (i = 0; i < nsrcs; i++) { + if (!v6) + src.ip4 = igmp_grec->grec_src[i]; +#if IS_ENABLED(CONFIG_IPV6) + else + memcpy(&src.ip6, &mld_grec->grec_src[i], + sizeof(struct in6_addr)); +#endif + snode = amt_lookup_src(tunnel, gnode, filter, &src); + if (!snode) + continue; + amt_act_src(tunnel, gnode, snode, act); + } + break; + case AMT_OPS_UNI: + /* A+B */ + for (i = 0; i < amt->hash_buckets; i++) { + hlist_for_each_entry_safe(snode, t, &gnode->sources[i], + node) { + if (amt_status_filter(snode, filter)) + amt_act_src(tunnel, gnode, snode, act); + } + } + for (i = 0; i < nsrcs; i++) { + if (!v6) + src.ip4 = igmp_grec->grec_src[i]; +#if IS_ENABLED(CONFIG_IPV6) + else + memcpy(&src.ip6, &mld_grec->grec_src[i], + sizeof(struct in6_addr)); +#endif + snode = amt_lookup_src(tunnel, gnode, filter, &src); + if (!snode) + continue; + amt_act_src(tunnel, gnode, snode, act); + } + break; + case AMT_OPS_SUB: + /* A-B */ + for (i = 0; i < amt->hash_buckets; i++) { + hlist_for_each_entry_safe(snode, t, &gnode->sources[i], + node) { + if (!amt_status_filter(snode, filter)) + continue; + for (j = 0; j < nsrcs; j++) { + if (!v6) + src.ip4 = igmp_grec->grec_src[j]; +#if IS_ENABLED(CONFIG_IPV6) + else + memcpy(&src.ip6, + &mld_grec->grec_src[j], + sizeof(struct in6_addr)); +#endif + if (amt_addr_equal(&snode->source_addr, + &src)) + goto out_sub; + } + amt_act_src(tunnel, gnode, snode, act); + continue; +out_sub:; + } + } + break; + case AMT_OPS_SUB_REV: + /* B-A */ + for (i = 0; i < nsrcs; i++) { + if (!v6) + src.ip4 = igmp_grec->grec_src[i]; +#if IS_ENABLED(CONFIG_IPV6) + else + memcpy(&src.ip6, &mld_grec->grec_src[i], + sizeof(struct in6_addr)); +#endif + snode = amt_lookup_src(tunnel, gnode, AMT_FILTER_ALL, + &src); + if (!snode) { + snode = amt_lookup_src(tunnel, gnode, + filter, &src); + if (snode) + amt_act_src(tunnel, gnode, snode, act); + } + } + break; + default: + netdev_dbg(amt->dev, "Invalid type\n"); + return; + } +} + +static void amt_mcast_is_in_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI + */ + /* Update IS_IN (B) as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_NONE_NEW, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* Update INCLUDE (A) as NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* (B)=GMI */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD_NEW, + AMT_ACT_GMI, + v6); + } else { +/* State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) IS_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI + */ + /* Update (A) in (X, Y) as NONE/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_BOTH, + AMT_ACT_STATUS_NONE_NEW, + v6); + /* Update FWD/OLD as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, zero_grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* Update IS_IN (A) as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_NONE_NEW, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* Update EXCLUDE (, Y-A) as D_FWD_NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + } +} + +static void amt_mcast_is_ex_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Group Timer=GMI + */ + /* EXCLUDE(A*B, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE(, B-A) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* (B-A)=0 */ + amt_lookup_act_srcs(tunnel, gnode, zero_grec, AMT_OPS_UNI, + AMT_FILTER_D_FWD_NEW, + AMT_ACT_GMI_ZERO, + v6); + /* Group Timer=GMI */ + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + gnode->filter_mode = MCAST_EXCLUDE; + /* Delete (A-B) will be worked by amt_cleanup_srcs(). */ + } else { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI + * Delete (X-A) + * Delete (Y-A) + * Group Timer=GMI + */ + /* EXCLUDE (A-Y, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, Y*A ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* (A-X-Y)=GMI */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_BOTH_NEW, + AMT_ACT_GMI, + v6); + /* Group Timer=GMI */ + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + /* Delete (X-A), (Y-A) will be worked by amt_cleanup_srcs(). */ + } +} + +static void amt_mcast_to_in_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI + * Send Q(G,A-B) + */ + /* Update TO_IN (B) sources as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_NONE_NEW, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* Update INCLUDE (A) sources as NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* (B)=GMI */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD_NEW, + AMT_ACT_GMI, + v6); + } else { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) TO_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI + * Send Q(G,X-A) + * Send Q(G) + */ + /* Update TO_IN (A) sources as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_NONE_NEW, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* Update EXCLUDE(X,) sources as FWD/NEW */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, Y-A) + * (A) are already switched to FWD_NEW. + * So, D_FWD/OLD -> D_FWD/NEW is okay. + */ + amt_lookup_act_srcs(tunnel, gnode, zero_grec, AMT_OPS_UNI, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* (A)=GMI + * Only FWD_NEW will have (A) sources. + */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD_NEW, + AMT_ACT_GMI, + v6); + } +} + +static void amt_mcast_to_ex_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Send Q(G,A*B) + * Group Timer=GMI + */ + /* EXCLUDE (A*B, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, B-A) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* (B-A)=0 */ + amt_lookup_act_srcs(tunnel, gnode, zero_grec, AMT_OPS_UNI, + AMT_FILTER_D_FWD_NEW, + AMT_ACT_GMI_ZERO, + v6); + /* Group Timer=GMI */ + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + gnode->filter_mode = MCAST_EXCLUDE; + /* Delete (A-B) will be worked by amt_cleanup_srcs(). */ + } else { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer + * Delete (X-A) + * Delete (Y-A) + * Send Q(G,A-Y) + * Group Timer=GMI + */ + /* Update (A-X-Y) as NONE/OLD */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_BOTH, + AMT_ACT_GT, + v6); + /* EXCLUDE (A-Y, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, Y*A) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* Group Timer=GMI */ + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + /* Delete (X-A), (Y-A) will be worked by amt_cleanup_srcs(). */ + } +} + +static void amt_mcast_allow_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI + */ + /* INCLUDE (A+B) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* (B)=GMI */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD_NEW, + AMT_ACT_GMI, + v6); + } else { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI + */ + /* EXCLUDE (X+A, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, Y-A) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + /* (A)=GMI + * All (A) source are now FWD/NEW status. + */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_INT, + AMT_FILTER_FWD_NEW, + AMT_ACT_GMI, + v6); + } +} + +static void amt_mcast_block_handler(struct amt_dev *amt, + struct amt_tunnel_list *tunnel, + struct amt_group_node *gnode, + void *grec, void *zero_grec, bool v6) +{ + if (gnode->filter_mode == MCAST_INCLUDE) { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) + */ + /* INCLUDE (A) */ + amt_lookup_act_srcs(tunnel, gnode, zero_grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + } else { +/* Router State Report Rec'd New Router State Actions + * ------------ ------------ ---------------- ------- + * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer + * Send Q(G,A-Y) + */ + /* (A-X-Y)=Group Timer */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_BOTH, + AMT_ACT_GT, + v6); + /* EXCLUDE (X, ) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (X+(A-Y) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_SUB_REV, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_FWD_NEW, + v6); + /* EXCLUDE (, Y) */ + amt_lookup_act_srcs(tunnel, gnode, grec, AMT_OPS_UNI, + AMT_FILTER_D_FWD, + AMT_ACT_STATUS_D_FWD_NEW, + v6); + } +} + +/* RFC 3376 + * 7.3.2. In the Presence of Older Version Group Members + * + * When Group Compatibility Mode is IGMPv2, a router internally + * translates the following IGMPv2 messages for that group to their + * IGMPv3 equivalents: + * + * IGMPv2 Message IGMPv3 Equivalent + * -------------- ----------------- + * Report IS_EX( {} ) + * Leave TO_IN( {} ) + */ +static void amt_igmpv2_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct igmphdr *ih = igmp_hdr(skb); + struct iphdr *iph = ip_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + + memset(&group, 0, sizeof(union amt_addr)); + group.ip4 = ih->group; + memset(&host, 0, sizeof(union amt_addr)); + host.ip4 = iph->saddr; + + gnode = amt_lookup_group(tunnel, &group, &host, false); + if (!gnode) { + gnode = amt_add_group(amt, tunnel, &group, &host, false); + if (!IS_ERR(gnode)) { + gnode->filter_mode = MCAST_EXCLUDE; + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + } + } +} + +/* RFC 3376 + * 7.3.2. In the Presence of Older Version Group Members + * + * When Group Compatibility Mode is IGMPv2, a router internally + * translates the following IGMPv2 messages for that group to their + * IGMPv3 equivalents: + * + * IGMPv2 Message IGMPv3 Equivalent + * -------------- ----------------- + * Report IS_EX( {} ) + * Leave TO_IN( {} ) + */ +static void amt_igmpv2_leave_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct igmphdr *ih = igmp_hdr(skb); + struct iphdr *iph = ip_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + + memset(&group, 0, sizeof(union amt_addr)); + group.ip4 = ih->group; + memset(&host, 0, sizeof(union amt_addr)); + host.ip4 = iph->saddr; + + gnode = amt_lookup_group(tunnel, &group, &host, false); + if (gnode) + amt_del_group(amt, gnode); +} + +static void amt_igmpv3_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct igmpv3_report *ihrv3 = igmpv3_report_hdr(skb); + int len = skb_transport_offset(skb) + sizeof(*ihrv3); + void *zero_grec = (void *)&igmpv3_zero_grec; + struct iphdr *iph = ip_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + struct igmpv3_grec *grec; + u16 nsrcs; + int i; + + for (i = 0; i < ntohs(ihrv3->ngrec); i++) { + len += sizeof(*grec); + if (!ip_mc_may_pull(skb, len)) + break; + + grec = (void *)(skb->data + len - sizeof(*grec)); + nsrcs = ntohs(grec->grec_nsrcs); + + len += nsrcs * sizeof(__be32); + if (!ip_mc_may_pull(skb, len)) + break; + + memset(&group, 0, sizeof(union amt_addr)); + group.ip4 = grec->grec_mca; + memset(&host, 0, sizeof(union amt_addr)); + host.ip4 = iph->saddr; + gnode = amt_lookup_group(tunnel, &group, &host, false); + if (!gnode) { + gnode = amt_add_group(amt, tunnel, &group, &host, + false); + if (IS_ERR(gnode)) + continue; + } + + amt_add_srcs(amt, tunnel, gnode, grec, false); + switch (grec->grec_type) { + case IGMPV3_MODE_IS_INCLUDE: + amt_mcast_is_in_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + case IGMPV3_MODE_IS_EXCLUDE: + amt_mcast_is_ex_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + case IGMPV3_CHANGE_TO_INCLUDE: + amt_mcast_to_in_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + case IGMPV3_CHANGE_TO_EXCLUDE: + amt_mcast_to_ex_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + case IGMPV3_ALLOW_NEW_SOURCES: + amt_mcast_allow_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + case IGMPV3_BLOCK_OLD_SOURCES: + amt_mcast_block_handler(amt, tunnel, gnode, grec, + zero_grec, false); + break; + default: + break; + } + amt_cleanup_srcs(amt, tunnel, gnode); + } +} + +/* caller held tunnel->lock */ +static void amt_igmp_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct igmphdr *ih = igmp_hdr(skb); + + switch (ih->type) { + case IGMPV3_HOST_MEMBERSHIP_REPORT: + amt_igmpv3_report_handler(amt, skb, tunnel); + break; + case IGMPV2_HOST_MEMBERSHIP_REPORT: + amt_igmpv2_report_handler(amt, skb, tunnel); + break; + case IGMP_HOST_LEAVE_MESSAGE: + amt_igmpv2_leave_handler(amt, skb, tunnel); + break; + default: + break; + } +} + +#if IS_ENABLED(CONFIG_IPV6) +/* RFC 3810 + * 8.3.2. In the Presence of MLDv1 Multicast Address Listeners + * + * When Multicast Address Compatibility Mode is MLDv2, a router acts + * using the MLDv2 protocol for that multicast address. When Multicast + * Address Compatibility Mode is MLDv1, a router internally translates + * the following MLDv1 messages for that multicast address to their + * MLDv2 equivalents: + * + * MLDv1 Message MLDv2 Equivalent + * -------------- ----------------- + * Report IS_EX( {} ) + * Done TO_IN( {} ) + */ +static void amt_mldv1_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct mld_msg *mld = (struct mld_msg *)icmp6_hdr(skb); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + + memcpy(&group.ip6, &mld->mld_mca, sizeof(struct in6_addr)); + memcpy(&host.ip6, &ip6h->saddr, sizeof(struct in6_addr)); + + gnode = amt_lookup_group(tunnel, &group, &host, true); + if (!gnode) { + gnode = amt_add_group(amt, tunnel, &group, &host, true); + if (!IS_ERR(gnode)) { + gnode->filter_mode = MCAST_EXCLUDE; + if (!mod_delayed_work(amt_wq, &gnode->group_timer, + msecs_to_jiffies(amt_gmi(amt)))) + dev_hold(amt->dev); + } + } +} + +/* RFC 3810 + * 8.3.2. In the Presence of MLDv1 Multicast Address Listeners + * + * When Multicast Address Compatibility Mode is MLDv2, a router acts + * using the MLDv2 protocol for that multicast address. When Multicast + * Address Compatibility Mode is MLDv1, a router internally translates + * the following MLDv1 messages for that multicast address to their + * MLDv2 equivalents: + * + * MLDv1 Message MLDv2 Equivalent + * -------------- ----------------- + * Report IS_EX( {} ) + * Done TO_IN( {} ) + */ +static void amt_mldv1_leave_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct mld_msg *mld = (struct mld_msg *)icmp6_hdr(skb); + struct iphdr *iph = ip_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + + memcpy(&group.ip6, &mld->mld_mca, sizeof(struct in6_addr)); + memset(&host, 0, sizeof(union amt_addr)); + host.ip4 = iph->saddr; + + gnode = amt_lookup_group(tunnel, &group, &host, true); + if (gnode) { + amt_del_group(amt, gnode); + return; + } +} + +static void amt_mldv2_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct mld2_report *mld2r = (struct mld2_report *)icmp6_hdr(skb); + int len = skb_transport_offset(skb) + sizeof(*mld2r); + void *zero_grec = (void *)&mldv2_zero_grec; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct amt_group_node *gnode; + union amt_addr group, host; + struct mld2_grec *grec; + u16 nsrcs; + int i; + + for (i = 0; i < ntohs(mld2r->mld2r_ngrec); i++) { + len += sizeof(*grec); + if (!ipv6_mc_may_pull(skb, len)) + break; + + grec = (void *)(skb->data + len - sizeof(*grec)); + nsrcs = ntohs(grec->grec_nsrcs); + + len += nsrcs * sizeof(struct in6_addr); + if (!ipv6_mc_may_pull(skb, len)) + break; + + memset(&group, 0, sizeof(union amt_addr)); + group.ip6 = grec->grec_mca; + memset(&host, 0, sizeof(union amt_addr)); + host.ip6 = ip6h->saddr; + gnode = amt_lookup_group(tunnel, &group, &host, true); + if (!gnode) { + gnode = amt_add_group(amt, tunnel, &group, &host, + ETH_P_IPV6); + if (IS_ERR(gnode)) + continue; + } + + amt_add_srcs(amt, tunnel, gnode, grec, true); + switch (grec->grec_type) { + case MLD2_MODE_IS_INCLUDE: + amt_mcast_is_in_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + case MLD2_MODE_IS_EXCLUDE: + amt_mcast_is_ex_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + case MLD2_CHANGE_TO_INCLUDE: + amt_mcast_to_in_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + case MLD2_CHANGE_TO_EXCLUDE: + amt_mcast_to_ex_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + case MLD2_ALLOW_NEW_SOURCES: + amt_mcast_allow_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + case MLD2_BLOCK_OLD_SOURCES: + amt_mcast_block_handler(amt, tunnel, gnode, grec, + zero_grec, true); + break; + default: + break; + } + amt_cleanup_srcs(amt, tunnel, gnode); + } +} + +/* caller held tunnel->lock */ +static void amt_mld_report_handler(struct amt_dev *amt, struct sk_buff *skb, + struct amt_tunnel_list *tunnel) +{ + struct mld_msg *mld = (struct mld_msg *)icmp6_hdr(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REPORT: + amt_mldv1_report_handler(amt, skb, tunnel); + break; + case ICMPV6_MLD2_REPORT: + amt_mldv2_report_handler(amt, skb, tunnel); + break; + case ICMPV6_MGM_REDUCTION: + amt_mldv1_leave_handler(amt, skb, tunnel); + break; + default: + break; + } +} +#endif + +static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb) +{ + struct amt_header_advertisement *amta; + int hdr_size; + + hdr_size = sizeof(*amta) + sizeof(struct udphdr); + if (!pskb_may_pull(skb, hdr_size)) + return true; + + amta = (struct amt_header_advertisement *)(udp_hdr(skb) + 1); + if (!amta->ip4) + return true; + + if (amta->reserved || amta->version) + return true; + + if (ipv4_is_loopback(amta->ip4) || ipv4_is_multicast(amta->ip4) || + ipv4_is_zeronet(amta->ip4)) + return true; + + if (amt->status != AMT_STATUS_SENT_DISCOVERY || + amt->nonce != amta->nonce) + return true; + + amt->remote_ip = amta->ip4; + netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip); + mod_delayed_work(amt_wq, &amt->req_wq, 0); + + amt_update_gw_status(amt, AMT_STATUS_RECEIVED_ADVERTISEMENT, true); + return false; +} + +static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb) +{ + struct amt_header_mcast_data *amtmd; + int hdr_size, len, err; + struct ethhdr *eth; + struct iphdr *iph; + + if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE) + return true; + + hdr_size = sizeof(*amtmd) + sizeof(struct udphdr); + if (!pskb_may_pull(skb, hdr_size)) + return true; + + amtmd = (struct amt_header_mcast_data *)(udp_hdr(skb) + 1); + if (amtmd->reserved || amtmd->version) + return true; + + if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_IP), false)) + return true; + + skb_reset_network_header(skb); + skb_push(skb, sizeof(*eth)); + skb_reset_mac_header(skb); + skb_pull(skb, sizeof(*eth)); + eth = eth_hdr(skb); + + if (!pskb_may_pull(skb, sizeof(*iph))) + return true; + iph = ip_hdr(skb); + + if (iph->version == 4) { + if (!ipv4_is_multicast(iph->daddr)) + return true; + skb->protocol = htons(ETH_P_IP); + eth->h_proto = htons(ETH_P_IP); + ip_eth_mc_map(iph->daddr, eth->h_dest); +#if IS_ENABLED(CONFIG_IPV6) + } else if (iph->version == 6) { + struct ipv6hdr *ip6h; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return true; + + ip6h = ipv6_hdr(skb); + if (!ipv6_addr_is_multicast(&ip6h->daddr)) + return true; + skb->protocol = htons(ETH_P_IPV6); + eth->h_proto = htons(ETH_P_IPV6); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); +#endif + } else { + return true; + } + + skb->pkt_type = PACKET_MULTICAST; + skb->ip_summed = CHECKSUM_NONE; + len = skb->len; + err = gro_cells_receive(&amt->gro_cells, skb); + if (likely(err == NET_RX_SUCCESS)) + dev_sw_netstats_rx_add(amt->dev, len); + else + amt->dev->stats.rx_dropped++; + + return false; +} + +static bool amt_membership_query_handler(struct amt_dev *amt, + struct sk_buff *skb) +{ + struct amt_header_membership_query *amtmq; + struct igmpv3_query *ihv3; + struct ethhdr *eth, *oeth; + struct iphdr *iph; + int hdr_size, len; + + hdr_size = sizeof(*amtmq) + sizeof(struct udphdr); + if (!pskb_may_pull(skb, hdr_size)) + return true; + + amtmq = (struct amt_header_membership_query *)(udp_hdr(skb) + 1); + if (amtmq->reserved || amtmq->version) + return true; + + if (amtmq->nonce != amt->nonce) + return true; + + hdr_size -= sizeof(*eth); + if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false)) + return true; + + oeth = eth_hdr(skb); + skb_reset_mac_header(skb); + skb_pull(skb, sizeof(*eth)); + skb_reset_network_header(skb); + eth = eth_hdr(skb); + if (!pskb_may_pull(skb, sizeof(*iph))) + return true; + + iph = ip_hdr(skb); + if (iph->version == 4) { + if (READ_ONCE(amt->ready4)) + return true; + + if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS + + sizeof(*ihv3))) + return true; + + if (!ipv4_is_multicast(iph->daddr)) + return true; + + ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS); + skb_reset_transport_header(skb); + skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS); + WRITE_ONCE(amt->ready4, true); + amt->mac = amtmq->response_mac; + amt->req_cnt = 0; + amt->qi = ihv3->qqic; + skb->protocol = htons(ETH_P_IP); + eth->h_proto = htons(ETH_P_IP); + ip_eth_mc_map(iph->daddr, eth->h_dest); +#if IS_ENABLED(CONFIG_IPV6) + } else if (iph->version == 6) { + struct mld2_query *mld2q; + struct ipv6hdr *ip6h; + + if (READ_ONCE(amt->ready6)) + return true; + + if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS + + sizeof(*mld2q))) + return true; + + ip6h = ipv6_hdr(skb); + if (!ipv6_addr_is_multicast(&ip6h->daddr)) + return true; + + mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); + skb_reset_transport_header(skb); + skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); + WRITE_ONCE(amt->ready6, true); + amt->mac = amtmq->response_mac; + amt->req_cnt = 0; + amt->qi = mld2q->mld2q_qqic; + skb->protocol = htons(ETH_P_IPV6); + eth->h_proto = htons(ETH_P_IPV6); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); +#endif + } else { + return true; + } + + ether_addr_copy(eth->h_source, oeth->h_source); + skb->pkt_type = PACKET_MULTICAST; + skb->ip_summed = CHECKSUM_NONE; + len = skb->len; + local_bh_disable(); + if (__netif_rx(skb) == NET_RX_SUCCESS) { + amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true); + dev_sw_netstats_rx_add(amt->dev, len); + } else { + amt->dev->stats.rx_dropped++; + } + local_bh_enable(); + + return false; +} + +static bool amt_update_handler(struct amt_dev *amt, struct sk_buff *skb) +{ + struct amt_header_membership_update *amtmu; + struct amt_tunnel_list *tunnel; + struct ethhdr *eth; + struct iphdr *iph; + int len, hdr_size; + + iph = ip_hdr(skb); + + hdr_size = sizeof(*amtmu) + sizeof(struct udphdr); + if (!pskb_may_pull(skb, hdr_size)) + return true; + + amtmu = (struct amt_header_membership_update *)(udp_hdr(skb) + 1); + if (amtmu->reserved || amtmu->version) + return true; + + if (iptunnel_pull_header(skb, hdr_size, skb->protocol, false)) + return true; + + skb_reset_network_header(skb); + + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list) { + if (tunnel->ip4 == iph->saddr) { + if ((amtmu->nonce == tunnel->nonce && + amtmu->response_mac == tunnel->mac)) { + mod_delayed_work(amt_wq, &tunnel->gc_wq, + msecs_to_jiffies(amt_gmi(amt)) + * 3); + goto report; + } else { + netdev_dbg(amt->dev, "Invalid MAC\n"); + return true; + } + } + } + + return true; + +report: + if (!pskb_may_pull(skb, sizeof(*iph))) + return true; + + iph = ip_hdr(skb); + if (iph->version == 4) { + if (ip_mc_check_igmp(skb)) { + netdev_dbg(amt->dev, "Invalid IGMP\n"); + return true; + } + + spin_lock_bh(&tunnel->lock); + amt_igmp_report_handler(amt, skb, tunnel); + spin_unlock_bh(&tunnel->lock); + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = htons(ETH_P_IP); + eth->h_proto = htons(ETH_P_IP); + ip_eth_mc_map(iph->daddr, eth->h_dest); +#if IS_ENABLED(CONFIG_IPV6) + } else if (iph->version == 6) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + + if (ipv6_mc_check_mld(skb)) { + netdev_dbg(amt->dev, "Invalid MLD\n"); + return true; + } + + spin_lock_bh(&tunnel->lock); + amt_mld_report_handler(amt, skb, tunnel); + spin_unlock_bh(&tunnel->lock); + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = htons(ETH_P_IPV6); + eth->h_proto = htons(ETH_P_IPV6); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); +#endif + } else { + netdev_dbg(amt->dev, "Unsupported Protocol\n"); + return true; + } + + skb_pull(skb, sizeof(struct ethhdr)); + skb->pkt_type = PACKET_MULTICAST; + skb->ip_summed = CHECKSUM_NONE; + len = skb->len; + if (__netif_rx(skb) == NET_RX_SUCCESS) { + amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_UPDATE, + true); + dev_sw_netstats_rx_add(amt->dev, len); + } else { + amt->dev->stats.rx_dropped++; + } + + return false; +} + +static void amt_send_advertisement(struct amt_dev *amt, __be32 nonce, + __be32 daddr, __be16 dport) +{ + struct amt_header_advertisement *amta; + int hlen, tlen, offset; + struct socket *sock; + struct udphdr *udph; + struct sk_buff *skb; + struct iphdr *iph; + struct rtable *rt; + struct flowi4 fl4; + u32 len; + int err; + + rcu_read_lock(); + sock = rcu_dereference(amt->sock); + if (!sock) + goto out; + + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev)) + goto out; + + rt = ip_route_output_ports(amt->net, &fl4, sock->sk, + daddr, amt->local_ip, + dport, amt->relay_port, + IPPROTO_UDP, 0, + amt->stream_dev->ifindex); + if (IS_ERR(rt)) { + amt->dev->stats.tx_errors++; + goto out; + } + + hlen = LL_RESERVED_SPACE(amt->dev); + tlen = amt->dev->needed_tailroom; + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amta); + skb = netdev_alloc_skb_ip_align(amt->dev, len); + if (!skb) { + ip_rt_put(rt); + amt->dev->stats.tx_errors++; + goto out; + } + + skb->priority = TC_PRIO_CONTROL; + skb_dst_set(skb, &rt->dst); + + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amta); + skb_reset_network_header(skb); + skb_put(skb, len); + amta = skb_pull(skb, sizeof(*iph) + sizeof(*udph)); + amta->version = 0; + amta->type = AMT_MSG_ADVERTISEMENT; + amta->reserved = 0; + amta->nonce = nonce; + amta->ip4 = amt->local_ip; + skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + udph->source = amt->relay_port; + udph->dest = dport; + udph->len = htons(sizeof(*amta) + sizeof(*udph)); + udph->check = 0; + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + udph->check = csum_tcpudp_magic(amt->local_ip, daddr, + sizeof(*udph) + sizeof(*amta), + IPPROTO_UDP, skb->csum); + + skb_push(skb, sizeof(*iph)); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = (sizeof(struct iphdr)) >> 2; + iph->tos = AMT_TOS; + iph->frag_off = 0; + iph->ttl = ip4_dst_hoplimit(&rt->dst); + iph->daddr = daddr; + iph->saddr = amt->local_ip; + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(len); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(amt->net, skb, NULL); + ip_send_check(iph); + err = ip_local_out(amt->net, sock->sk, skb); + if (unlikely(net_xmit_eval(err))) + amt->dev->stats.tx_errors++; + +out: + rcu_read_unlock(); +} + +static bool amt_discovery_handler(struct amt_dev *amt, struct sk_buff *skb) +{ + struct amt_header_discovery *amtd; + struct udphdr *udph; + struct iphdr *iph; + + if (!pskb_may_pull(skb, sizeof(*udph) + sizeof(*amtd))) + return true; + + iph = ip_hdr(skb); + udph = udp_hdr(skb); + amtd = (struct amt_header_discovery *)(udp_hdr(skb) + 1); + + if (amtd->reserved || amtd->version) + return true; + + amt_send_advertisement(amt, amtd->nonce, iph->saddr, udph->source); + + return false; +} + +static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) +{ + struct amt_header_request *amtrh; + struct amt_tunnel_list *tunnel; + unsigned long long key; + struct udphdr *udph; + struct iphdr *iph; + u64 mac; + int i; + + if (!pskb_may_pull(skb, sizeof(*udph) + sizeof(*amtrh))) + return true; + + iph = ip_hdr(skb); + udph = udp_hdr(skb); + amtrh = (struct amt_header_request *)(udp_hdr(skb) + 1); + + if (amtrh->reserved1 || amtrh->reserved2 || amtrh->version) + return true; + + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list) + if (tunnel->ip4 == iph->saddr) + goto send; + + spin_lock_bh(&amt->lock); + if (amt->nr_tunnels >= amt->max_tunnels) { + spin_unlock_bh(&amt->lock); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + return true; + } + + tunnel = kzalloc(sizeof(*tunnel) + + (sizeof(struct hlist_head) * amt->hash_buckets), + GFP_ATOMIC); + if (!tunnel) { + spin_unlock_bh(&amt->lock); + return true; + } + + tunnel->source_port = udph->source; + tunnel->ip4 = iph->saddr; + + memcpy(&key, &tunnel->key, sizeof(unsigned long long)); + tunnel->amt = amt; + spin_lock_init(&tunnel->lock); + for (i = 0; i < amt->hash_buckets; i++) + INIT_HLIST_HEAD(&tunnel->groups[i]); + + INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire); + + list_add_tail_rcu(&tunnel->list, &amt->tunnel_list); + tunnel->key = amt->key; + __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true); + amt->nr_tunnels++; + mod_delayed_work(amt_wq, &tunnel->gc_wq, + msecs_to_jiffies(amt_gmi(amt))); + spin_unlock_bh(&amt->lock); + +send: + tunnel->nonce = amtrh->nonce; + mac = siphash_3u32((__force u32)tunnel->ip4, + (__force u32)tunnel->source_port, + (__force u32)tunnel->nonce, + &tunnel->key); + tunnel->mac = mac >> 16; + + if (!netif_running(amt->dev) || !netif_running(amt->stream_dev)) + return true; + + if (!amtrh->p) + amt_send_igmp_gq(amt, tunnel); + else + amt_send_mld_gq(amt, tunnel); + + return false; +} + +static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb) +{ + int type = amt_parse_type(skb); + int err = 1; + + if (type == -1) + goto drop; + + if (amt->mode == AMT_MODE_GATEWAY) { + switch (type) { + case AMT_MSG_ADVERTISEMENT: + err = amt_advertisement_handler(amt, skb); + break; + case AMT_MSG_MEMBERSHIP_QUERY: + err = amt_membership_query_handler(amt, skb); + if (!err) + return; + break; + default: + netdev_dbg(amt->dev, "Invalid type of Gateway\n"); + break; + } + } +drop: + if (err) { + amt->dev->stats.rx_dropped++; + kfree_skb(skb); + } else { + consume_skb(skb); + } +} + +static int amt_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct amt_dev *amt; + struct iphdr *iph; + int type; + bool err; + + rcu_read_lock_bh(); + amt = rcu_dereference_sk_user_data(sk); + if (!amt) { + err = true; + kfree_skb(skb); + goto out; + } + + skb->dev = amt->dev; + iph = ip_hdr(skb); + type = amt_parse_type(skb); + if (type == -1) { + err = true; + goto drop; + } + + if (amt->mode == AMT_MODE_GATEWAY) { + switch (type) { + case AMT_MSG_ADVERTISEMENT: + if (iph->saddr != amt->discovery_ip) { + netdev_dbg(amt->dev, "Invalid Relay IP\n"); + err = true; + goto drop; + } + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; + goto drop; + } + goto out; + case AMT_MSG_MULTICAST_DATA: + if (iph->saddr != amt->remote_ip) { + netdev_dbg(amt->dev, "Invalid Relay IP\n"); + err = true; + goto drop; + } + err = amt_multicast_data_handler(amt, skb); + if (err) + goto drop; + else + goto out; + case AMT_MSG_MEMBERSHIP_QUERY: + if (iph->saddr != amt->remote_ip) { + netdev_dbg(amt->dev, "Invalid Relay IP\n"); + err = true; + goto drop; + } + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; + goto drop; + } + goto out; + default: + err = true; + netdev_dbg(amt->dev, "Invalid type of Gateway\n"); + break; + } + } else { + switch (type) { + case AMT_MSG_DISCOVERY: + err = amt_discovery_handler(amt, skb); + break; + case AMT_MSG_REQUEST: + err = amt_request_handler(amt, skb); + break; + case AMT_MSG_MEMBERSHIP_UPDATE: + err = amt_update_handler(amt, skb); + if (err) + goto drop; + else + goto out; + default: + err = true; + netdev_dbg(amt->dev, "Invalid type of relay\n"); + break; + } + } +drop: + if (err) { + amt->dev->stats.rx_dropped++; + kfree_skb(skb); + } else { + consume_skb(skb); + } +out: + rcu_read_unlock_bh(); + return 0; +} + +static void amt_event_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(work, struct amt_dev, event_wq); + struct sk_buff *skb; + u8 event; + int i; + + for (i = 0; i < AMT_MAX_EVENTS; i++) { + spin_lock_bh(&amt->lock); + if (amt->nr_events == 0) { + spin_unlock_bh(&amt->lock); + return; + } + event = amt->events[amt->event_idx].event; + skb = amt->events[amt->event_idx].skb; + amt->events[amt->event_idx].event = AMT_EVENT_NONE; + amt->events[amt->event_idx].skb = NULL; + amt->nr_events--; + amt->event_idx++; + amt->event_idx %= AMT_MAX_EVENTS; + spin_unlock_bh(&amt->lock); + + switch (event) { + case AMT_EVENT_RECEIVE: + amt_gw_rcv(amt, skb); + break; + case AMT_EVENT_SEND_DISCOVERY: + amt_event_send_discovery(amt); + break; + case AMT_EVENT_SEND_REQUEST: + amt_event_send_request(amt); + break; + default: + kfree_skb(skb); + break; + } + } +} + +static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) +{ + struct amt_dev *amt; + int type; + + rcu_read_lock_bh(); + amt = rcu_dereference_sk_user_data(sk); + if (!amt) + goto out; + + if (amt->mode != AMT_MODE_GATEWAY) + goto drop; + + type = amt_parse_type(skb); + if (type == -1) + goto drop; + + netdev_dbg(amt->dev, "Received IGMP Unreachable of %s\n", + type_str[type]); + switch (type) { + case AMT_MSG_DISCOVERY: + break; + case AMT_MSG_REQUEST: + case AMT_MSG_MEMBERSHIP_UPDATE: + if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT) + mod_delayed_work(amt_wq, &amt->req_wq, 0); + break; + default: + goto drop; + } +out: + rcu_read_unlock_bh(); + return 0; +drop: + rcu_read_unlock_bh(); + amt->dev->stats.rx_dropped++; + return 0; +} + +static struct socket *amt_create_sock(struct net *net, __be16 port) +{ + struct udp_port_cfg udp_conf; + struct socket *sock; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + + udp_conf.local_udp_port = port; + + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + return ERR_PTR(err); + + return sock; +} + +static int amt_socket_create(struct amt_dev *amt) +{ + struct udp_tunnel_sock_cfg tunnel_cfg; + struct socket *sock; + + sock = amt_create_sock(amt->net, amt->relay_port); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + /* Mark socket as an encapsulation socket */ + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.sk_user_data = amt; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = amt_rcv; + tunnel_cfg.encap_err_lookup = amt_err_lookup; + tunnel_cfg.encap_destroy = NULL; + setup_udp_tunnel_sock(amt->net, sock, &tunnel_cfg); + + rcu_assign_pointer(amt->sock, sock); + return 0; +} + +static int amt_dev_open(struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + int err; + + amt->ready4 = false; + amt->ready6 = false; + amt->event_idx = 0; + amt->nr_events = 0; + + err = amt_socket_create(amt); + if (err) + return err; + + amt->req_cnt = 0; + amt->remote_ip = 0; + amt->nonce = 0; + get_random_bytes(&amt->key, sizeof(siphash_key_t)); + + amt->status = AMT_STATUS_INIT; + if (amt->mode == AMT_MODE_GATEWAY) { + mod_delayed_work(amt_wq, &amt->discovery_wq, 0); + mod_delayed_work(amt_wq, &amt->req_wq, 0); + } else if (amt->mode == AMT_MODE_RELAY) { + mod_delayed_work(amt_wq, &amt->secret_wq, + msecs_to_jiffies(AMT_SECRET_TIMEOUT)); + } + return err; +} + +static int amt_dev_stop(struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + struct amt_tunnel_list *tunnel, *tmp; + struct socket *sock; + struct sk_buff *skb; + int i; + + cancel_delayed_work_sync(&amt->req_wq); + cancel_delayed_work_sync(&amt->discovery_wq); + cancel_delayed_work_sync(&amt->secret_wq); + + /* shutdown */ + sock = rtnl_dereference(amt->sock); + RCU_INIT_POINTER(amt->sock, NULL); + synchronize_net(); + if (sock) + udp_tunnel_sock_release(sock); + + cancel_work_sync(&amt->event_wq); + for (i = 0; i < AMT_MAX_EVENTS; i++) { + skb = amt->events[i].skb; + kfree_skb(skb); + amt->events[i].event = AMT_EVENT_NONE; + amt->events[i].skb = NULL; + } + + amt->ready4 = false; + amt->ready6 = false; + amt->req_cnt = 0; + amt->remote_ip = 0; + + list_for_each_entry_safe(tunnel, tmp, &amt->tunnel_list, list) { + list_del_rcu(&tunnel->list); + amt->nr_tunnels--; + cancel_delayed_work_sync(&tunnel->gc_wq); + amt_clear_groups(tunnel); + kfree_rcu(tunnel, rcu); + } + + return 0; +} + +static const struct device_type amt_type = { + .name = "amt", +}; + +static int amt_dev_init(struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + int err; + + amt->dev = dev; + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&amt->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + return 0; +} + +static void amt_dev_uninit(struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + + gro_cells_destroy(&amt->gro_cells); + free_percpu(dev->tstats); +} + +static const struct net_device_ops amt_netdev_ops = { + .ndo_init = amt_dev_init, + .ndo_uninit = amt_dev_uninit, + .ndo_open = amt_dev_open, + .ndo_stop = amt_dev_stop, + .ndo_start_xmit = amt_dev_xmit, + .ndo_get_stats64 = dev_get_tstats64, +}; + +static void amt_link_setup(struct net_device *dev) +{ + dev->netdev_ops = &amt_netdev_ops; + dev->needs_free_netdev = true; + SET_NETDEV_DEVTYPE(dev, &amt_type); + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->features |= NETIF_F_NETNS_LOCAL; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->hw_features |= NETIF_F_FRAGLIST | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + eth_hw_addr_random(dev); + eth_zero_addr(dev->broadcast); + ether_setup(dev); +} + +static const struct nla_policy amt_policy[IFLA_AMT_MAX + 1] = { + [IFLA_AMT_MODE] = { .type = NLA_U32 }, + [IFLA_AMT_RELAY_PORT] = { .type = NLA_U16 }, + [IFLA_AMT_GATEWAY_PORT] = { .type = NLA_U16 }, + [IFLA_AMT_LINK] = { .type = NLA_U32 }, + [IFLA_AMT_LOCAL_IP] = { .len = sizeof_field(struct iphdr, daddr) }, + [IFLA_AMT_REMOTE_IP] = { .len = sizeof_field(struct iphdr, daddr) }, + [IFLA_AMT_DISCOVERY_IP] = { .len = sizeof_field(struct iphdr, daddr) }, + [IFLA_AMT_MAX_TUNNELS] = { .type = NLA_U32 }, +}; + +static int amt_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (!data) + return -EINVAL; + + if (!data[IFLA_AMT_LINK]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_AMT_LINK], + "Link attribute is required"); + return -EINVAL; + } + + if (!data[IFLA_AMT_MODE]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_AMT_MODE], + "Mode attribute is required"); + return -EINVAL; + } + + if (nla_get_u32(data[IFLA_AMT_MODE]) > AMT_MODE_MAX) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_AMT_MODE], + "Mode attribute is not valid"); + return -EINVAL; + } + + if (!data[IFLA_AMT_LOCAL_IP]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_AMT_DISCOVERY_IP], + "Local attribute is required"); + return -EINVAL; + } + + if (!data[IFLA_AMT_DISCOVERY_IP] && + nla_get_u32(data[IFLA_AMT_MODE]) == AMT_MODE_GATEWAY) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_AMT_LOCAL_IP], + "Discovery attribute is required"); + return -EINVAL; + } + + return 0; +} + +static int amt_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct amt_dev *amt = netdev_priv(dev); + int err = -EINVAL; + + amt->net = net; + amt->mode = nla_get_u32(data[IFLA_AMT_MODE]); + + if (data[IFLA_AMT_MAX_TUNNELS] && + nla_get_u32(data[IFLA_AMT_MAX_TUNNELS])) + amt->max_tunnels = nla_get_u32(data[IFLA_AMT_MAX_TUNNELS]); + else + amt->max_tunnels = AMT_MAX_TUNNELS; + + spin_lock_init(&amt->lock); + amt->max_groups = AMT_MAX_GROUP; + amt->max_sources = AMT_MAX_SOURCE; + amt->hash_buckets = AMT_HSIZE; + amt->nr_tunnels = 0; + get_random_bytes(&amt->hash_seed, sizeof(amt->hash_seed)); + amt->stream_dev = dev_get_by_index(net, + nla_get_u32(data[IFLA_AMT_LINK])); + if (!amt->stream_dev) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_LINK], + "Can't find stream device"); + return -ENODEV; + } + + if (amt->stream_dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_LINK], + "Invalid stream device type"); + goto err; + } + + amt->local_ip = nla_get_in_addr(data[IFLA_AMT_LOCAL_IP]); + if (ipv4_is_loopback(amt->local_ip) || + ipv4_is_zeronet(amt->local_ip) || + ipv4_is_multicast(amt->local_ip)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_LOCAL_IP], + "Invalid Local address"); + goto err; + } + + if (data[IFLA_AMT_RELAY_PORT]) + amt->relay_port = nla_get_be16(data[IFLA_AMT_RELAY_PORT]); + else + amt->relay_port = htons(IANA_AMT_UDP_PORT); + + if (data[IFLA_AMT_GATEWAY_PORT]) + amt->gw_port = nla_get_be16(data[IFLA_AMT_GATEWAY_PORT]); + else + amt->gw_port = htons(IANA_AMT_UDP_PORT); + + if (!amt->relay_port) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_DISCOVERY_IP], + "relay port must not be 0"); + goto err; + } + if (amt->mode == AMT_MODE_RELAY) { + amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); + amt->qri = 10; + dev->needed_headroom = amt->stream_dev->needed_headroom + + AMT_RELAY_HLEN; + dev->mtu = amt->stream_dev->mtu - AMT_RELAY_HLEN; + dev->max_mtu = dev->mtu; + dev->min_mtu = ETH_MIN_MTU + AMT_RELAY_HLEN; + } else { + if (!data[IFLA_AMT_DISCOVERY_IP]) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_DISCOVERY_IP], + "discovery must be set in gateway mode"); + goto err; + } + if (!amt->gw_port) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_DISCOVERY_IP], + "gateway port must not be 0"); + goto err; + } + amt->remote_ip = 0; + amt->discovery_ip = nla_get_in_addr(data[IFLA_AMT_DISCOVERY_IP]); + if (ipv4_is_loopback(amt->discovery_ip) || + ipv4_is_zeronet(amt->discovery_ip) || + ipv4_is_multicast(amt->discovery_ip)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_AMT_DISCOVERY_IP], + "discovery must be unicast"); + goto err; + } + + dev->needed_headroom = amt->stream_dev->needed_headroom + + AMT_GW_HLEN; + dev->mtu = amt->stream_dev->mtu - AMT_GW_HLEN; + dev->max_mtu = dev->mtu; + dev->min_mtu = ETH_MIN_MTU + AMT_GW_HLEN; + } + amt->qi = AMT_INIT_QUERY_INTERVAL; + + err = register_netdevice(dev); + if (err < 0) { + netdev_dbg(dev, "failed to register new netdev %d\n", err); + goto err; + } + + err = netdev_upper_dev_link(amt->stream_dev, dev, extack); + if (err < 0) { + unregister_netdevice(dev); + goto err; + } + + INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work); + INIT_DELAYED_WORK(&amt->req_wq, amt_req_work); + INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work); + INIT_WORK(&amt->event_wq, amt_event_work); + INIT_LIST_HEAD(&amt->tunnel_list); + return 0; +err: + dev_put(amt->stream_dev); + return err; +} + +static void amt_dellink(struct net_device *dev, struct list_head *head) +{ + struct amt_dev *amt = netdev_priv(dev); + + unregister_netdevice_queue(dev, head); + netdev_upper_dev_unlink(amt->stream_dev, dev); + dev_put(amt->stream_dev); +} + +static size_t amt_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__u32)) + /* IFLA_AMT_MODE */ + nla_total_size(sizeof(__u16)) + /* IFLA_AMT_RELAY_PORT */ + nla_total_size(sizeof(__u16)) + /* IFLA_AMT_GATEWAY_PORT */ + nla_total_size(sizeof(__u32)) + /* IFLA_AMT_LINK */ + nla_total_size(sizeof(__u32)) + /* IFLA_MAX_TUNNELS */ + nla_total_size(sizeof(struct iphdr)) + /* IFLA_AMT_DISCOVERY_IP */ + nla_total_size(sizeof(struct iphdr)) + /* IFLA_AMT_REMOTE_IP */ + nla_total_size(sizeof(struct iphdr)); /* IFLA_AMT_LOCAL_IP */ +} + +static int amt_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct amt_dev *amt = netdev_priv(dev); + + if (nla_put_u32(skb, IFLA_AMT_MODE, amt->mode)) + goto nla_put_failure; + if (nla_put_be16(skb, IFLA_AMT_RELAY_PORT, amt->relay_port)) + goto nla_put_failure; + if (nla_put_be16(skb, IFLA_AMT_GATEWAY_PORT, amt->gw_port)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_AMT_LINK, amt->stream_dev->ifindex)) + goto nla_put_failure; + if (nla_put_in_addr(skb, IFLA_AMT_LOCAL_IP, amt->local_ip)) + goto nla_put_failure; + if (nla_put_in_addr(skb, IFLA_AMT_DISCOVERY_IP, amt->discovery_ip)) + goto nla_put_failure; + if (amt->remote_ip) + if (nla_put_in_addr(skb, IFLA_AMT_REMOTE_IP, amt->remote_ip)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_AMT_MAX_TUNNELS, amt->max_tunnels)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops amt_link_ops __read_mostly = { + .kind = "amt", + .maxtype = IFLA_AMT_MAX, + .policy = amt_policy, + .priv_size = sizeof(struct amt_dev), + .setup = amt_link_setup, + .validate = amt_validate, + .newlink = amt_newlink, + .dellink = amt_dellink, + .get_size = amt_get_size, + .fill_info = amt_fill_info, +}; + +static struct net_device *amt_lookup_upper_dev(struct net_device *dev) +{ + struct net_device *upper_dev; + struct amt_dev *amt; + + for_each_netdev(dev_net(dev), upper_dev) { + if (netif_is_amt(upper_dev)) { + amt = netdev_priv(upper_dev); + if (amt->stream_dev == dev) + return upper_dev; + } + } + + return NULL; +} + +static int amt_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_device *upper_dev; + struct amt_dev *amt; + LIST_HEAD(list); + int new_mtu; + + upper_dev = amt_lookup_upper_dev(dev); + if (!upper_dev) + return NOTIFY_DONE; + amt = netdev_priv(upper_dev); + + switch (event) { + case NETDEV_UNREGISTER: + amt_dellink(amt->dev, &list); + unregister_netdevice_many(&list); + break; + case NETDEV_CHANGEMTU: + if (amt->mode == AMT_MODE_RELAY) + new_mtu = dev->mtu - AMT_RELAY_HLEN; + else + new_mtu = dev->mtu - AMT_GW_HLEN; + + dev_set_mtu(amt->dev, new_mtu); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block amt_notifier_block __read_mostly = { + .notifier_call = amt_device_event, +}; + +static int __init amt_init(void) +{ + int err; + + err = register_netdevice_notifier(&amt_notifier_block); + if (err < 0) + goto err; + + err = rtnl_link_register(&amt_link_ops); + if (err < 0) + goto unregister_notifier; + + amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0); + if (!amt_wq) { + err = -ENOMEM; + goto rtnl_unregister; + } + + spin_lock_init(&source_gc_lock); + spin_lock_bh(&source_gc_lock); + INIT_DELAYED_WORK(&source_gc_wq, amt_source_gc_work); + mod_delayed_work(amt_wq, &source_gc_wq, + msecs_to_jiffies(AMT_GC_INTERVAL)); + spin_unlock_bh(&source_gc_lock); + + return 0; + +rtnl_unregister: + rtnl_link_unregister(&amt_link_ops); +unregister_notifier: + unregister_netdevice_notifier(&amt_notifier_block); +err: + pr_err("error loading AMT module loaded\n"); + return err; +} +late_initcall(amt_init); + +static void __exit amt_fini(void) +{ + rtnl_link_unregister(&amt_link_ops); + unregister_netdevice_notifier(&amt_notifier_block); + cancel_delayed_work_sync(&source_gc_wq); + __amt_source_gc_work(); + destroy_workqueue(amt_wq); +} +module_exit(amt_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); +MODULE_ALIAS_RTNL_LINK("amt"); |