diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /net/bridge | |
parent | Initial commit. (diff) | |
download | linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip |
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/bridge')
61 files changed, 29299 insertions, 0 deletions
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig new file mode 100644 index 000000000..808791965 --- /dev/null +++ b/net/bridge/Kconfig @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# 802.1d Ethernet Bridging +# + +config BRIDGE + tristate "802.1d Ethernet Bridging" + select LLC + select STP + depends on IPV6 || IPV6=n + help + If you say Y here, then your Linux box will be able to act as an + Ethernet bridge, which means that the different Ethernet segments it + is connected to will appear as one Ethernet to the participants. + Several such bridges can work together to create even larger + networks of Ethernets using the IEEE 802.1 spanning tree algorithm. + As this is a standard, Linux bridges will cooperate properly with + other third party bridge products. + + In order to use the Ethernet bridge, you'll need the bridge + configuration tools; see <file:Documentation/networking/bridge.rst> + for location. Please read the Bridge mini-HOWTO for more + information. + + If you enable iptables support along with the bridge support then you + turn your bridge into a bridging IP firewall. + iptables will then see the IP packets being bridged, so you need to + take this into account when setting up your firewall rules. + Enabling arptables support when bridging will let arptables see + bridged ARP traffic in the arptables FORWARD chain. + + To compile this code as a module, choose M here: the module + will be called bridge. + + If unsure, say N. + +config BRIDGE_IGMP_SNOOPING + bool "IGMP/MLD snooping" + depends on BRIDGE + depends on INET + default y + help + If you say Y here, then the Ethernet bridge will be able selectively + forward multicast traffic based on IGMP/MLD traffic received from + each port. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. + +config BRIDGE_VLAN_FILTERING + bool "VLAN filtering" + depends on BRIDGE + depends on VLAN_8021Q + default n + help + If you say Y here, then the Ethernet bridge will be able selectively + receive and forward traffic based on VLAN information in the packet + any VLAN information configured on the bridge port or bridge device. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. + +config BRIDGE_MRP + bool "MRP protocol" + depends on BRIDGE + default n + help + If you say Y here, then the Ethernet bridge will be able to run MRP + protocol to detect loops + + Say N to exclude this support and reduce the binary size. + + If unsure, say N. diff --git a/net/bridge/Makefile b/net/bridge/Makefile new file mode 100644 index 000000000..ccb394236 --- /dev/null +++ b/net/bridge/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the IEEE 802.1d ethernet bridging layer. +# + +obj-$(CONFIG_BRIDGE) += bridge.o + +bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ + br_ioctl.o br_stp.o br_stp_bpdu.o \ + br_stp_if.o br_stp_timer.o br_netlink.o \ + br_netlink_tunnel.o br_arp_nd_proxy.o + +bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o + +bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o + +br_netfilter-y := br_netfilter_hooks.o +br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o +obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o + +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o + +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o + +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + +obj-$(CONFIG_NETFILTER) += netfilter/ + +bridge-$(CONFIG_BRIDGE_MRP) += br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o diff --git a/net/bridge/br.c b/net/bridge/br.c new file mode 100644 index 000000000..1b169f8e7 --- /dev/null +++ b/net/bridge/br.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Generic parts + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/init.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/stp.h> +#include <net/switchdev.h> + +#include "br_private.h" + +/* + * Handle changes in state of network devices enslaved to a bridge. + * + * Note: don't care about up/down if bridge itself is down, because + * port state is checked when bridge is brought up. + */ +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct netdev_notifier_pre_changeaddr_info *prechaddr_info; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + bool notified = false; + bool changed_addr; + int err; + + if (dev->priv_flags & IFF_EBRIDGE) { + err = br_vlan_bridge_event(dev, event, ptr); + if (err) + return notifier_from_errno(err); + + if (event == NETDEV_REGISTER) { + /* register of bridge completed, add sysfs entries */ + err = br_sysfs_addbr(dev); + if (err) + return notifier_from_errno(err); + + return NOTIFY_DONE; + } + } + + /* not a port of a bridge */ + p = br_port_get_rtnl(dev); + if (!p) + return NOTIFY_DONE; + + br = p->br; + + switch (event) { + case NETDEV_CHANGEMTU: + br_mtu_auto_adjust(br); + break; + + case NETDEV_PRE_CHANGEADDR: + if (br->dev->addr_assign_type == NET_ADDR_SET) + break; + prechaddr_info = ptr; + err = dev_pre_changeaddr_notify(br->dev, + prechaddr_info->dev_addr, + extack); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_CHANGEADDR: + spin_lock_bh(&br->lock); + br_fdb_changeaddr(p, dev->dev_addr); + changed_addr = br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + break; + + case NETDEV_CHANGE: + br_port_carrier_check(p, ¬ified); + break; + + case NETDEV_FEAT_CHANGE: + netdev_update_features(br->dev); + break; + + case NETDEV_DOWN: + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) { + br_stp_disable_port(p); + notified = true; + } + spin_unlock_bh(&br->lock); + break; + + case NETDEV_UP: + if (netif_running(br->dev) && netif_oper_up(dev)) { + spin_lock_bh(&br->lock); + br_stp_enable_port(p); + notified = true; + spin_unlock_bh(&br->lock); + } + break; + + case NETDEV_UNREGISTER: + br_del_if(br, dev); + break; + + case NETDEV_CHANGENAME: + err = br_sysfs_renameif(p); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; + + case NETDEV_RESEND_IGMP: + /* Propagate to master device */ + call_netdevice_notifiers(event, br->dev); + break; + } + + if (event != NETDEV_UNREGISTER) + br_vlan_port_event(p, event); + + /* Events that may cause spanning tree to refresh */ + if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || + event == NETDEV_CHANGE || event == NETDEV_DOWN)) + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + return NOTIFY_DONE; +} + +static struct notifier_block br_device_notifier = { + .notifier_call = br_device_event +}; + +/* called with RTNL or RCU */ +static int br_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + struct switchdev_notifier_fdb_info *fdb_info; + int err = NOTIFY_DONE; + + p = br_port_get_rtnl_rcu(dev); + if (!p) + goto out; + + br = p->br; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + fdb_info = ptr; + err = br_fdb_external_learn_add(br, p, fdb_info->addr, + fdb_info->vid, false); + if (err) { + err = notifier_from_errno(err); + break; + } + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid, true); + break; + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + fdb_info = ptr; + err = br_fdb_external_learn_del(br, p, fdb_info->addr, + fdb_info->vid, false); + if (err) + err = notifier_from_errno(err); + break; + case SWITCHDEV_FDB_OFFLOADED: + fdb_info = ptr; + br_fdb_offloaded_set(br, p, fdb_info->addr, + fdb_info->vid, fdb_info->offloaded); + break; + case SWITCHDEV_FDB_FLUSH_TO_BRIDGE: + fdb_info = ptr; + /* Don't delete static entries */ + br_fdb_delete_by_port(br, p, fdb_info->vid, 0); + break; + } + +out: + return err; +} + +static struct notifier_block br_switchdev_notifier = { + .notifier_call = br_switchdev_event, +}; + +/* br_boolopt_toggle - change user-controlled boolean option + * + * @br: bridge device + * @opt: id of the option to change + * @on: new option value + * @extack: extack for error messages + * + * Changes the value of the respective boolean option to @on taking care of + * any internal option value mapping and configuration. + */ +int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, + struct netlink_ext_ack *extack) +{ + switch (opt) { + case BR_BOOLOPT_NO_LL_LEARN: + br_opt_toggle(br, BROPT_NO_LL_LEARN, on); + break; + default: + /* shouldn't be called with unsupported options */ + WARN_ON(1); + break; + } + + return 0; +} + +int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) +{ + switch (opt) { + case BR_BOOLOPT_NO_LL_LEARN: + return br_opt_get(br, BROPT_NO_LL_LEARN); + default: + /* shouldn't be called with unsupported options */ + WARN_ON(1); + break; + } + + return 0; +} + +int br_boolopt_multi_toggle(struct net_bridge *br, + struct br_boolopt_multi *bm, + struct netlink_ext_ack *extack) +{ + unsigned long bitmap = bm->optmask; + int err = 0; + int opt_id; + + for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) { + bool on = !!(bm->optval & BIT(opt_id)); + + err = br_boolopt_toggle(br, opt_id, on, extack); + if (err) { + br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n", + opt_id, br_boolopt_get(br, opt_id), on, err); + break; + } + } + + return err; +} + +void br_boolopt_multi_get(const struct net_bridge *br, + struct br_boolopt_multi *bm) +{ + u32 optval = 0; + int opt_id; + + for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++) + optval |= (br_boolopt_get(br, opt_id) << opt_id); + + bm->optval = optval; + bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0); +} + +/* private bridge options, controlled by the kernel */ +void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) +{ + bool cur = !!br_opt_get(br, opt); + + br_debug(br, "toggle option: %d state: %d -> %d\n", + opt, cur, on); + + if (cur == on) + return; + + if (on) + set_bit(opt, &br->options); + else + clear_bit(opt, &br->options); +} + +static void __net_exit br_net_exit(struct net *net) +{ + struct net_device *dev; + LIST_HEAD(list); + + rtnl_lock(); + for_each_netdev(net, dev) + if (dev->priv_flags & IFF_EBRIDGE) + br_dev_delete(dev, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); + +} + +static struct pernet_operations br_net_ops = { + .exit = br_net_exit, +}; + +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; + +static int __init br_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > sizeof_field(struct sk_buff, cb)); + + err = stp_proto_register(&br_stp_proto); + if (err < 0) { + pr_err("bridge: can't register sap for STP\n"); + return err; + } + + err = br_fdb_init(); + if (err) + goto err_out; + + err = register_pernet_subsys(&br_net_ops); + if (err) + goto err_out1; + + err = br_nf_core_init(); + if (err) + goto err_out2; + + err = register_netdevice_notifier(&br_device_notifier); + if (err) + goto err_out3; + + err = register_switchdev_notifier(&br_switchdev_notifier); + if (err) + goto err_out4; + + err = br_netlink_init(); + if (err) + goto err_out5; + + brioctl_set(br_ioctl_deviceless_stub); + +#if IS_ENABLED(CONFIG_ATM_LANE) + br_fdb_test_addr_hook = br_fdb_test_addr; +#endif + +#if IS_MODULE(CONFIG_BRIDGE_NETFILTER) + pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " + "by default. Update your scripts to load br_netfilter if you " + "need this.\n"); +#endif + + return 0; + +err_out5: + unregister_switchdev_notifier(&br_switchdev_notifier); +err_out4: + unregister_netdevice_notifier(&br_device_notifier); +err_out3: + br_nf_core_fini(); +err_out2: + unregister_pernet_subsys(&br_net_ops); +err_out1: + br_fdb_fini(); +err_out: + stp_proto_unregister(&br_stp_proto); + return err; +} + +static void __exit br_deinit(void) +{ + stp_proto_unregister(&br_stp_proto); + br_netlink_fini(); + unregister_switchdev_notifier(&br_switchdev_notifier); + unregister_netdevice_notifier(&br_device_notifier); + brioctl_set(NULL); + unregister_pernet_subsys(&br_net_ops); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ + + br_nf_core_fini(); +#if IS_ENABLED(CONFIG_ATM_LANE) + br_fdb_test_addr_hook = NULL; +#endif + br_fdb_fini(); +} + +module_init(br_init) +module_exit(br_deinit) +MODULE_LICENSE("GPL"); +MODULE_VERSION(BR_VERSION); +MODULE_ALIAS_RTNL_LINK("bridge"); diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c new file mode 100644 index 000000000..3db1def44 --- /dev/null +++ b/net/bridge/br_arp_nd_proxy.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handle bridge arp/nd proxy/suppress + * + * Copyright (C) 2017 Cumulus Networks + * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com> + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/neighbour.h> +#include <net/arp.h> +#include <linux/if_vlan.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <net/ipv6_stubs.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + +#include "br_private.h" + +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool neigh_suppress = false; + + list_for_each_entry(p, &br->port_list, list) { + if (p->flags & BR_NEIGH_SUPPRESS) { + neigh_suppress = true; + break; + } + } + + br_opt_toggle(br, BROPT_NEIGH_SUPPRESS_ENABLED, neigh_suppress); +} + +#if IS_ENABLED(CONFIG_INET) +static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, + struct net_device *dev, __be32 dest_ip, __be32 src_ip, + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw, + __be16 vlan_proto, u16 vlan_tci) +{ + struct net_bridge_vlan_group *vg; + struct sk_buff *skb; + u16 pvid; + + netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n", + dev->name, &dest_ip, dest_hw, &src_ip, src_hw); + + if (!vlan_tci) { + arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + return; + } + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + if (!skb) + return; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + + if (p) { + arp_xmit(skb); + } else { + skb_reset_mac_header(skb); + __skb_pull(skb, skb_network_offset(skb)); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_HOST; + + netif_rx_ni(skb); + } +} + +static int br_chk_addr_ip(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + __be32 ip = *(__be32 *)priv->data; + struct in_device *in_dev; + __be32 addr = 0; + + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip, + RT_SCOPE_HOST); + + if (addr == ip) + return 1; + + return 0; +} + +static bool br_is_local_ip(struct net_device *dev, __be32 ip) +{ + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; + + if (br_chk_addr_ip(dev, &priv)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv)) + return true; + + return false; +} + +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = dev; + struct neighbour *n; + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; + + if ((dev->flags & IFF_NOARP) || + !pskb_may_pull(skb, arp_hdr_len(dev))) + return; + + parp = arp_hdr(skb); + + if (parp->ar_pro != htons(ETH_P_IP) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + return; + + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + return; + + if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + if (parp->ar_op != htons(ARPOP_RREQUEST) && + parp->ar_op != htons(ARPOP_RREPLY) && + (ipv4_is_zeronet(sip) || sip == tip)) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + return; + } + } + + if (parp->ar_op != htons(ARPOP_REQUEST)) + return; + + if (vid != 0) { + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } + + if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && + br_is_local_ip(vlandev, tip)) { + /* its our local ip, so don't proxy reply + * and don't forward to neigh suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + return; + } + + n = neigh_lookup(&arp_tbl, &tip, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if ((p && (p->flags & BR_PROXYARP)) || + (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI | + BR_NEIGH_SUPPRESS)))) { + if (!vid) + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, 0, 0); + else + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, + skb->vlan_proto, + skb_vlan_tag_get(skb)); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to arp replied + */ + if (replied || + br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + } + + neigh_release(n); + } +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg) +{ + struct nd_msg *m; + + m = skb_header_pointer(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), sizeof(*msg), msg); + if (!m) + return NULL; + + if (m->icmph.icmp6_code != 0 || + (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && + m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) + return NULL; + + return m; +} + +static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, + struct sk_buff *request, struct neighbour *n, + __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) +{ + struct net_device *dev = request->dev; + struct net_bridge_vlan_group *vg; + struct sk_buff *reply; + struct nd_msg *na; + struct ipv6hdr *pip6; + int na_olen = 8; /* opt hdr + ETH_ALEN for target */ + int ns_olen; + int i, len; + u8 *daddr; + u16 pvid; + + if (!dev) + return; + + len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + + sizeof(*na) + na_olen + dev->needed_tailroom; + + reply = alloc_skb(len, GFP_ATOMIC); + if (!reply) + return; + + reply->protocol = htons(ETH_P_IPV6); + reply->dev = dev; + skb_reserve(reply, LL_RESERVED_SPACE(dev)); + skb_push(reply, sizeof(struct ethhdr)); + skb_set_mac_header(reply, 0); + + daddr = eth_hdr(request)->h_source; + + /* Do we need option processing ? */ + ns_olen = request->len - (skb_network_offset(request) + + sizeof(struct ipv6hdr)) - sizeof(*ns); + for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { + if (!ns->opt[i + 1]) { + kfree_skb(reply); + return; + } + if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + break; + } + } + + /* Ethernet header */ + ether_addr_copy(eth_hdr(reply)->h_dest, daddr); + ether_addr_copy(eth_hdr(reply)->h_source, n->ha); + eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); + reply->protocol = htons(ETH_P_IPV6); + + skb_pull(reply, sizeof(struct ethhdr)); + skb_set_network_header(reply, 0); + skb_put(reply, sizeof(struct ipv6hdr)); + + /* IPv6 header */ + pip6 = ipv6_hdr(reply); + memset(pip6, 0, sizeof(struct ipv6hdr)); + pip6->version = 6; + pip6->priority = ipv6_hdr(request)->priority; + pip6->nexthdr = IPPROTO_ICMPV6; + pip6->hop_limit = 255; + pip6->daddr = ipv6_hdr(request)->saddr; + pip6->saddr = *(struct in6_addr *)n->primary_key; + + skb_pull(reply, sizeof(struct ipv6hdr)); + skb_set_transport_header(reply, 0); + + na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); + + /* Neighbor Advertisement */ + memset(na, 0, sizeof(*na) + na_olen); + na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + na->icmph.icmp6_router = (n->flags & NTF_ROUTER) ? 1 : 0; + na->icmph.icmp6_override = 1; + na->icmph.icmp6_solicited = 1; + na->target = ns->target; + ether_addr_copy(&na->opt[2], n->ha); + na->opt[0] = ND_OPT_TARGET_LL_ADDR; + na->opt[1] = na_olen >> 3; + + na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, + &pip6->daddr, + sizeof(*na) + na_olen, + IPPROTO_ICMPV6, + csum_partial(na, sizeof(*na) + na_olen, 0)); + + pip6->payload_len = htons(sizeof(*na) + na_olen); + + skb_push(reply, sizeof(struct ipv6hdr)); + skb_push(reply, sizeof(struct ethhdr)); + + reply->ip_summed = CHECKSUM_UNNECESSARY; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci); + + netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n", + dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha); + + if (p) { + dev_queue_xmit(reply); + } else { + skb_reset_mac_header(reply); + __skb_pull(reply, skb_network_offset(reply)); + reply->ip_summed = CHECKSUM_UNNECESSARY; + reply->pkt_type = PACKET_HOST; + + netif_rx_ni(reply); + } +} + +static int br_chk_addr_ip6(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct in6_addr *addr = (struct in6_addr *)priv->data; + + if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) + return 1; + + return 0; +} + +static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) + +{ + struct netdev_nested_priv priv = { + .data = (void *)addr, + }; + + if (br_chk_addr_ip6(dev, &priv)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv)) + return true; + + return false; +} + +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = NULL; + struct in6_addr *saddr, *daddr; + struct ipv6hdr *iphdr; + struct neighbour *n; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; + + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + + if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && + !msg->icmph.icmp6_solicited) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + return; + } + + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + iphdr = ipv6_hdr(skb); + saddr = &iphdr->saddr; + daddr = &iphdr->daddr; + + if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + return; + } + + if (vid != 0) { + /* build neigh table lookup on the vlan device */ + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } else { + vlandev = dev; + } + + if (br_is_local_ip6(vlandev, &msg->target)) { + /* its our own ip, so don't proxy reply + * and don't forward to arp suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + return; + } + + n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) { + if (vid != 0) + br_nd_send(br, p, skb, n, + skb->vlan_proto, + skb_vlan_tag_get(skb), msg); + else + br_nd_send(br, p, skb, n, 0, 0, msg); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to NEIGH_SUPPRESS ports that we + * have replied + */ + if (replied || + br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; + } + neigh_release(n); + } +} +#endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c new file mode 100644 index 000000000..d3ea9d077 --- /dev/null +++ b/net/bridge/br_device.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Device handling code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/netpoll.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/list.h> +#include <linux/netfilter_bridge.h> + +#include <linux/uaccess.h> +#include "br_private.h" + +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + +const struct nf_br_ops __rcu *nf_br_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_br_ops); + +/* net device transmit always called with BH disabled */ +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_fdb_entry *dst; + struct net_bridge_mdb_entry *mdst; + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + const struct nf_br_ops *nf_ops; + u8 state = BR_STATE_FORWARDING; + const unsigned char *dest; + u16 vid = 0; + + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + + rcu_read_lock(); + nf_ops = rcu_dereference(nf_br_ops); + if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { + rcu_read_unlock(); + return NETDEV_TX_OK; + } + + u64_stats_update_begin(&brstats->syncp); + brstats->tx_packets++; + brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); + + br_switchdev_frame_unmark(skb); + BR_INPUT_SKB_CB(skb)->brdev = dev; + BR_INPUT_SKB_CB(skb)->frag_max_size = 0; + + skb_reset_mac_header(skb); + skb_pull(skb, ETH_HLEN); + + if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state)) + goto out; + + if (IS_ENABLED(CONFIG_INET) && + (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) || + eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && + br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { + br_do_proxy_suppress_arp(skb, br, vid, NULL); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, NULL, msg); + } + + dest = eth_hdr(skb)->h_dest; + if (is_broadcast_ether_addr(dest)) { + br_flood(br, skb, BR_PKT_BROADCAST, false, true); + } else if (is_multicast_ether_addr(dest)) { + if (unlikely(netpoll_tx_running(dev))) { + br_flood(br, skb, BR_PKT_MULTICAST, false, true); + goto out; + } + if (br_multicast_rcv(br, NULL, skb, vid)) { + kfree_skb(skb); + goto out; + } + + mdst = br_mdb_get(br, skb, vid); + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br, eth_hdr(skb))) + br_multicast_flood(mdst, skb, false, true); + else + br_flood(br, skb, BR_PKT_MULTICAST, false, true); + } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { + br_forward(dst->dst, skb, false, true); + } else { + br_flood(br, skb, BR_PKT_UNICAST, false, true); + } +out: + rcu_read_unlock(); + return NETDEV_TX_OK; +} + +static struct lock_class_key bridge_netdev_addr_lock_key; + +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + +static int br_dev_init(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!br->stats) + return -ENOMEM; + + err = br_fdb_hash_init(br); + if (err) { + free_percpu(br->stats); + return err; + } + + err = br_mdb_hash_init(br); + if (err) { + free_percpu(br->stats); + br_fdb_hash_fini(br); + return err; + } + + err = br_vlan_init(br); + if (err) { + free_percpu(br->stats); + br_mdb_hash_fini(br); + br_fdb_hash_fini(br); + return err; + } + + err = br_multicast_init_stats(br); + if (err) { + free_percpu(br->stats); + br_vlan_flush(br); + br_mdb_hash_fini(br); + br_fdb_hash_fini(br); + } + + br_set_lockdep_class(dev); + return err; +} + +static void br_dev_uninit(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br_multicast_dev_del(br); + br_multicast_uninit_stats(br); + br_vlan_flush(br); + br_mdb_hash_fini(br); + br_fdb_hash_fini(br); + free_percpu(br->stats); +} + +static int br_dev_open(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + netdev_update_features(dev); + netif_start_queue(dev); + br_stp_enable_bridge(br); + br_multicast_open(br); + + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + + return 0; +} + +static void br_dev_set_multicast_list(struct net_device *dev) +{ +} + +static void br_dev_change_rx_flags(struct net_device *dev, int change) +{ + if (change & IFF_PROMISC) + br_manage_promisc(netdev_priv(dev)); +} + +static int br_dev_stop(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br_stp_disable_bridge(br); + br_multicast_stop(br); + + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); + + netif_stop_queue(dev); + + return 0; +} + +static void br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct net_bridge *br = netdev_priv(dev); + + netdev_stats_to_stats64(stats, &dev->stats); + dev_fetch_sw_netstats(stats, br->stats); +} + +static int br_change_mtu(struct net_device *dev, int new_mtu) +{ + struct net_bridge *br = netdev_priv(dev); + + dev->mtu = new_mtu; + + /* this flag will be cleared if the MTU was automatically adjusted */ + br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true); +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* remember the MTU in the rtable for PMTU */ + dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); +#endif + + return 0; +} + +/* Allow setting mac address to any valid ethernet address. */ +static int br_set_mac_address(struct net_device *dev, void *p) +{ + struct net_bridge *br = netdev_priv(dev); + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + /* dev_set_mac_addr() can be called by a master device on bridge's + * NETDEV_UNREGISTER, but since it's being destroyed do nothing + */ + if (dev->reg_state != NETREG_REGISTERED) + return -EBUSY; + + spin_lock_bh(&br->lock); + if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { + /* Mac address will be changed in br_stp_change_bridge_id(). */ + br_stp_change_bridge_id(br, addr->sa_data); + } + spin_unlock_bh(&br->lock); + + return 0; +} + +static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, "bridge", sizeof(info->driver)); + strlcpy(info->version, BR_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "N/A", sizeof(info->bus_info)); +} + +static int br_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.port = PORT_OTHER; + cmd->base.speed = SPEED_UNKNOWN; + + list_for_each_entry(p, &br->port_list, list) { + struct ethtool_link_ksettings ecmd; + struct net_device *pdev = p->dev; + + if (!netif_running(pdev) || !netif_oper_up(pdev)) + continue; + + if (__ethtool_get_link_ksettings(pdev, &ecmd)) + continue; + + if (ecmd.base.speed == (__u32)SPEED_UNKNOWN) + continue; + + if (cmd->base.speed == (__u32)SPEED_UNKNOWN || + cmd->base.speed < ecmd.base.speed) + cmd->base.speed = ecmd.base.speed; + } + + return 0; +} + +static netdev_features_t br_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_features_recompute(br, features); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void br_poll_controller(struct net_device *br_dev) +{ +} + +static void br_netpoll_cleanup(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) + br_netpoll_disable(p); +} + +static int __br_netpoll_enable(struct net_bridge_port *p) +{ + struct netpoll *np; + int err; + + np = kzalloc(sizeof(*p->np), GFP_KERNEL); + if (!np) + return -ENOMEM; + + err = __netpoll_setup(np, p->dev); + if (err) { + kfree(np); + return err; + } + + p->np = np; + return err; +} + +int br_netpoll_enable(struct net_bridge_port *p) +{ + if (!p->br->dev->npinfo) + return 0; + + return __br_netpoll_enable(p); +} + +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + int err = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (!p->dev) + continue; + err = __br_netpoll_enable(p); + if (err) + goto fail; + } + +out: + return err; + +fail: + br_netpoll_cleanup(dev); + goto out; +} + +void br_netpoll_disable(struct net_bridge_port *p) +{ + struct netpoll *np = p->np; + + if (!np) + return; + + p->np = NULL; + + __netpoll_free(np); +} + +#endif + +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) + +{ + struct net_bridge *br = netdev_priv(dev); + + return br_add_if(br, slave_dev, extack); +} + +static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_del_if(br, slave_dev); +} + +static const struct ethtool_ops br_ethtool_ops = { + .get_drvinfo = br_getinfo, + .get_link = ethtool_op_get_link, + .get_link_ksettings = br_get_link_ksettings, +}; + +static const struct net_device_ops br_netdev_ops = { + .ndo_open = br_dev_open, + .ndo_stop = br_dev_stop, + .ndo_init = br_dev_init, + .ndo_uninit = br_dev_uninit, + .ndo_start_xmit = br_dev_xmit, + .ndo_get_stats64 = br_get_stats64, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_rx_mode = br_dev_set_multicast_list, + .ndo_change_rx_flags = br_dev_change_rx_flags, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_setup = br_netpoll_setup, + .ndo_netpoll_cleanup = br_netpoll_cleanup, + .ndo_poll_controller = br_poll_controller, +#endif + .ndo_add_slave = br_add_slave, + .ndo_del_slave = br_del_slave, + .ndo_fix_features = br_fix_features, + .ndo_fdb_add = br_fdb_add, + .ndo_fdb_del = br_fdb_delete, + .ndo_fdb_dump = br_fdb_dump, + .ndo_fdb_get = br_fdb_get, + .ndo_bridge_getlink = br_getlink, + .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, + .ndo_features_check = passthru_features_check, +}; + +static struct device_type br_type = { + .name = "bridge", +}; + +void br_dev_setup(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + eth_hw_addr_random(dev); + ether_setup(dev); + + dev->netdev_ops = &br_netdev_ops; + dev->needs_free_netdev = true; + dev->ethtool_ops = &br_ethtool_ops; + SET_NETDEV_DEVTYPE(dev, &br_type); + dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; + + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->vlan_features = COMMON_FEATURES; + + br->dev = dev; + spin_lock_init(&br->lock); + INIT_LIST_HEAD(&br->port_list); + INIT_HLIST_HEAD(&br->fdb_list); +#if IS_ENABLED(CONFIG_BRIDGE_MRP) + INIT_LIST_HEAD(&br->mrp_list); +#endif + spin_lock_init(&br->hash_lock); + + br->bridge_id.prio[0] = 0x80; + br->bridge_id.prio[1] = 0x00; + + ether_addr_copy(br->group_addr, eth_stp_addr); + + br->stp_enabled = BR_NO_STP; + br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + + br->designated_root = br->bridge_id; + br->bridge_max_age = br->max_age = 20 * HZ; + br->bridge_hello_time = br->hello_time = 2 * HZ; + br->bridge_forward_delay = br->forward_delay = 15 * HZ; + br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME; + dev->max_mtu = ETH_MAX_MTU; + + br_netfilter_rtable_init(br); + br_stp_timer_init(br); + br_multicast_init(br); + INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup); +} diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c new file mode 100644 index 000000000..8a6470a21 --- /dev/null +++ b/net/bridge/br_fdb.c @@ -0,0 +1,1318 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Forwarding database + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/spinlock.h> +#include <linux/times.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/jhash.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/atomic.h> +#include <asm/unaligned.h> +#include <linux/if_vlan.h> +#include <net/switchdev.h> +#include <trace/events/bridge.h> +#include "br_private.h" + +static const struct rhashtable_params br_fdb_rht_params = { + .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode), + .key_offset = offsetof(struct net_bridge_fdb_entry, key), + .key_len = sizeof(struct net_bridge_fdb_key), + .automatic_shrinking = true, +}; + +static struct kmem_cache *br_fdb_cache __read_mostly; +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *, int, bool); + +int __init br_fdb_init(void) +{ + br_fdb_cache = kmem_cache_create("bridge_fdb_cache", + sizeof(struct net_bridge_fdb_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!br_fdb_cache) + return -ENOMEM; + + return 0; +} + +void br_fdb_fini(void) +{ + kmem_cache_destroy(br_fdb_cache); +} + +int br_fdb_hash_init(struct net_bridge *br) +{ + return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params); +} + +void br_fdb_hash_fini(struct net_bridge *br) +{ + rhashtable_destroy(&br->fdb_hash_tbl); +} + +/* if topology_changing then use forward_delay (default 15 sec) + * otherwise keep longer (default 5 minutes) + */ +static inline unsigned long hold_time(const struct net_bridge *br) +{ + return br->topology_change ? br->forward_delay : br->ageing_time; +} + +static inline int has_expired(const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb) +{ + return !test_bit(BR_FDB_STATIC, &fdb->flags) && + !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) && + time_before_eq(fdb->updated + hold_time(br), jiffies); +} + +static void fdb_rcu_free(struct rcu_head *head) +{ + struct net_bridge_fdb_entry *ent + = container_of(head, struct net_bridge_fdb_entry, rcu); + kmem_cache_free(br_fdb_cache, ent); +} + +static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_key key; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + key.vlan_id = vid; + memcpy(key.addr.addr, addr, sizeof(key.addr.addr)); + + return rhashtable_lookup(tbl, &key, br_fdb_rht_params); +} + +/* requires bridge hash_lock */ +static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + lockdep_assert_held_once(&br->hash_lock); + + rcu_read_lock(); + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); + rcu_read_unlock(); + + return fdb; +} + +struct net_device *br_fdb_find_port(const struct net_device *br_dev, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *f; + struct net_device *dev = NULL; + struct net_bridge *br; + + ASSERT_RTNL(); + + if (!netif_is_bridge_master(br_dev)) + return NULL; + + br = netdev_priv(br_dev); + rcu_read_lock(); + f = br_fdb_find_rcu(br, addr, vid); + if (f && f->dst) + dev = f->dst->dev; + rcu_read_unlock(); + + return dev; +} +EXPORT_SYMBOL_GPL(br_fdb_find_port); + +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); +} + +/* When a static FDB entry is added, the mac address from the entry is + * added to the bridge private HW address list and all required ports + * are then updated with the new information. + * Called under RTNL. + */ +static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr) +{ + int err; + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) { + err = dev_uc_add(p->dev, addr); + if (err) + goto undo; + } + } + + return; +undo: + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } +} + +/* When a static FDB entry is deleted, the HW address from that entry is + * also removed from the bridge private HW address list and updates all + * the ports with needed information. + * Called under RTNL. + */ +static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) +{ + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } +} + +static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, + bool swdev_notify) +{ + trace_fdb_delete(br, f); + + if (test_bit(BR_FDB_STATIC, &f->flags)) + fdb_del_hw_addr(br, f->key.addr.addr); + + hlist_del_init_rcu(&f->fdb_node); + rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, + br_fdb_rht_params); + fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); + call_rcu(&f->rcu, fdb_rcu_free); +} + +/* Delete a local entry if no other port had the same address. */ +static void fdb_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_fdb_entry *f) +{ + const unsigned char *addr = f->key.addr.addr; + struct net_bridge_vlan_group *vg; + const struct net_bridge_vlan *v; + struct net_bridge_port *op; + u16 vid = f->key.vlan_id; + + /* Maybe another port has same hw addr? */ + list_for_each_entry(op, &br->port_list, list) { + vg = nbp_vlan_group(op); + if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && + (!vid || br_vlan_find(vg, vid))) { + f->dst = op; + clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); + return; + } + } + + vg = br_vlan_group(br); + v = br_vlan_find(vg, vid); + /* Maybe bridge device has same hw addr? */ + if (p && ether_addr_equal(br->dev->dev_addr, addr) && + (!vid || (v && br_vlan_should_use(v)))) { + f->dst = NULL; + clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); + return; + } + + fdb_delete(br, f, true); +} + +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_fdb_entry *f; + + spin_lock_bh(&br->hash_lock); + f = br_fdb_find(br, addr, vid); + if (f && test_bit(BR_FDB_LOCAL, &f->flags) && + !test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p) + fdb_delete_local(br, p, f); + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_fdb_entry *f; + struct net_bridge *br = p->br; + struct net_bridge_vlan *v; + + spin_lock_bh(&br->hash_lock); + vg = nbp_vlan_group(p); + hlist_for_each_entry(f, &br->fdb_list, fdb_node) { + if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) && + !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) { + /* delete old one */ + fdb_delete_local(br, p, f); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (!vg || !vg->num_vlans) + goto insert; + } + } + +insert: + /* insert new address, may fail if invalid address or dup. */ + fdb_insert(br, p, newaddr, 0); + + if (!vg || !vg->num_vlans) + goto done; + + /* Now add entries for every VLAN configured on the port. + * This function runs under RTNL so the bitmap will not change + * from under us. + */ + list_for_each_entry(v, &vg->vlan_list, vlist) + fdb_insert(br, p, newaddr, v->vid); + +done: + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_fdb_entry *f; + struct net_bridge_vlan *v; + + spin_lock_bh(&br->hash_lock); + + /* If old entry was unassociated with any port, then delete it. */ + f = br_fdb_find(br, br->dev->dev_addr, 0); + if (f && test_bit(BR_FDB_LOCAL, &f->flags) && + !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) + fdb_delete_local(br, NULL, f); + + fdb_insert(br, NULL, newaddr, 0); + vg = br_vlan_group(br); + if (!vg || !vg->num_vlans) + goto out; + /* Now remove and add entries for every VLAN configured on the + * bridge. This function runs under RTNL so the bitmap will not + * change from under us. + */ + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + f = br_fdb_find(br, br->dev->dev_addr, v->vid); + if (f && test_bit(BR_FDB_LOCAL, &f->flags) && + !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) + fdb_delete_local(br, NULL, f); + fdb_insert(br, NULL, newaddr, v->vid); + } +out: + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_cleanup(struct work_struct *work) +{ + struct net_bridge *br = container_of(work, struct net_bridge, + gc_work.work); + struct net_bridge_fdb_entry *f = NULL; + unsigned long delay = hold_time(br); + unsigned long work_delay = delay; + unsigned long now = jiffies; + + /* this part is tricky, in order to avoid blocking learning and + * consequently forwarding, we rely on rcu to delete objects with + * delayed freeing allowing us to continue traversing + */ + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + unsigned long this_timer = f->updated + delay; + + if (test_bit(BR_FDB_STATIC, &f->flags) || + test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) { + if (test_bit(BR_FDB_NOTIFY, &f->flags)) { + if (time_after(this_timer, now)) + work_delay = min(work_delay, + this_timer - now); + else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, + &f->flags)) + fdb_notify(br, f, RTM_NEWNEIGH, false); + } + continue; + } + + if (time_after(this_timer, now)) { + work_delay = min(work_delay, this_timer - now); + } else { + spin_lock_bh(&br->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) + fdb_delete(br, f, true); + spin_unlock_bh(&br->hash_lock); + } + } + rcu_read_unlock(); + + /* Cleanup minimum 10 milliseconds apart */ + work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); + mod_delayed_work(system_long_wq, &br->gc_work, work_delay); +} + +/* Completely flush all dynamic entries in forwarding database.*/ +void br_fdb_flush(struct net_bridge *br) +{ + struct net_bridge_fdb_entry *f; + struct hlist_node *tmp; + + spin_lock_bh(&br->hash_lock); + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { + if (!test_bit(BR_FDB_STATIC, &f->flags)) + fdb_delete(br, f, true); + } + spin_unlock_bh(&br->hash_lock); +} + +/* Flush all entries referring to a specific port. + * if do_all is set also flush static entries + * if vid is set delete all entries that match the vlan_id + */ +void br_fdb_delete_by_port(struct net_bridge *br, + const struct net_bridge_port *p, + u16 vid, + int do_all) +{ + struct net_bridge_fdb_entry *f; + struct hlist_node *tmp; + + spin_lock_bh(&br->hash_lock); + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { + if (f->dst != p) + continue; + + if (!do_all) + if (test_bit(BR_FDB_STATIC, &f->flags) || + (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && + !test_bit(BR_FDB_OFFLOADED, &f->flags)) || + (vid && f->key.vlan_id != vid)) + continue; + + if (test_bit(BR_FDB_LOCAL, &f->flags)) + fdb_delete_local(br, p, f); + else + fdb_delete(br, f, true); + } + spin_unlock_bh(&br->hash_lock); +} + +#if IS_ENABLED(CONFIG_ATM_LANE) +/* Interface used by ATM LANE hook to test + * if an addr is on some other bridge port */ +int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) +{ + struct net_bridge_fdb_entry *fdb; + struct net_bridge_port *port; + int ret; + + rcu_read_lock(); + port = br_port_get_rcu(dev); + if (!port) + ret = 0; + else { + fdb = br_fdb_find_rcu(port->br, addr, 0); + ret = fdb && fdb->dst && fdb->dst->dev != dev && + fdb->dst->state == BR_STATE_FORWARDING; + } + rcu_read_unlock(); + + return ret; +} +#endif /* CONFIG_ATM_LANE */ + +/* + * Fill buffer with forwarding table records in + * the API format. + */ +int br_fdb_fillbuf(struct net_bridge *br, void *buf, + unsigned long maxnum, unsigned long skip) +{ + struct net_bridge_fdb_entry *f; + struct __fdb_entry *fe = buf; + int num = 0; + + memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); + + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (num >= maxnum) + break; + + if (has_expired(br, f)) + continue; + + /* ignore pseudo entry for local MAC address */ + if (!f->dst) + continue; + + if (skip) { + --skip; + continue; + } + + /* convert from internal format to API */ + memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); + + /* due to ABI compat need to split into hi/lo */ + fe->port_no = f->dst->port_no; + fe->port_hi = f->dst->port_no >> 8; + + fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags); + if (!test_bit(BR_FDB_STATIC, &f->flags)) + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); + ++fe; + ++num; + } + rcu_read_unlock(); + + return num; +} + +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, + struct net_bridge_port *source, + const unsigned char *addr, + __u16 vid, + unsigned long flags) +{ + struct net_bridge_fdb_entry *fdb; + + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); + if (fdb) { + memcpy(fdb->key.addr.addr, addr, ETH_ALEN); + fdb->dst = source; + fdb->key.vlan_id = vid; + fdb->flags = flags; + fdb->updated = fdb->used = jiffies; + if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, + &fdb->rhnode, + br_fdb_rht_params)) { + kmem_cache_free(br_fdb_cache, fdb); + fdb = NULL; + } else { + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); + } + } + return fdb; +} + +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + if (!is_valid_ether_addr(addr)) + return -EINVAL; + + fdb = br_fdb_find(br, addr, vid); + if (fdb) { + /* it is okay to have multiple ports with same + * address, just use the first one. + */ + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return 0; + br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", + source ? source->dev->name : br->dev->name, addr, vid); + fdb_delete(br, fdb, true); + } + + fdb = fdb_create(br, source, addr, vid, + BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC)); + if (!fdb) + return -ENOMEM; + + fdb_add_hw_addr(br, addr); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); + return 0; +} + +/* Add entry for local address of interface */ +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid) +{ + int ret; + + spin_lock_bh(&br->hash_lock); + ret = fdb_insert(br, source, addr, vid); + spin_unlock_bh(&br->hash_lock); + return ret; +} + +/* returns true if the fdb was modified */ +static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb) +{ + return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) && + test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)); +} + +void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, unsigned long flags) +{ + struct net_bridge_fdb_entry *fdb; + + /* some users want to always flood. */ + if (hold_time(br) == 0) + return; + + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); + if (likely(fdb)) { + /* attempt to update an entry for a local interface */ + if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) { + if (net_ratelimit()) + br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", + source->dev->name, addr, vid); + } else { + unsigned long now = jiffies; + bool fdb_modified = false; + + if (now != fdb->updated) { + fdb->updated = now; + fdb_modified = __fdb_mark_active(fdb); + } + + /* fastpath: update of existing entry */ + if (unlikely(source != fdb->dst && + !test_bit(BR_FDB_STICKY, &fdb->flags))) { + fdb->dst = source; + fdb_modified = true; + /* Take over HW learned entry */ + if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN, + &fdb->flags))) + clear_bit(BR_FDB_ADDED_BY_EXT_LEARN, + &fdb->flags); + } + + if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (unlikely(fdb_modified)) { + trace_br_fdb_update(br, source, addr, vid, flags); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); + } + } + } else { + spin_lock(&br->hash_lock); + fdb = fdb_create(br, source, addr, vid, flags); + if (fdb) { + trace_br_fdb_update(br, source, addr, vid, flags); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); + } + /* else we lose race and someone else inserts + * it first, don't bother updating + */ + spin_unlock(&br->hash_lock); + } +} + +static int fdb_to_nud(const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb) +{ + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return NUD_PERMANENT; + else if (test_bit(BR_FDB_STATIC, &fdb->flags)) + return NUD_NOARP; + else if (has_expired(br, fdb)) + return NUD_STALE; + else + return NUD_REACHABLE; +} + +static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, + u32 portid, u32 seq, int type, unsigned int flags) +{ + unsigned long now = jiffies; + struct nda_cacheinfo ci; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; + ndm->ndm_state = fdb_to_nud(br, fdb); + + if (test_bit(BR_FDB_OFFLOADED, &fdb->flags)) + ndm->ndm_flags |= NTF_OFFLOADED; + if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) + ndm->ndm_flags |= NTF_EXT_LEARNED; + if (test_bit(BR_FDB_STICKY, &fdb->flags)) + ndm->ndm_flags |= NTF_STICKY; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) + goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; + ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_confirmed = 0; + ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_refcnt = 0; + if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; + + if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), + &fdb->key.vlan_id)) + goto nla_put_failure; + + if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) { + struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS); + u8 notify_bits = FDB_NOTIFY_BIT; + + if (!nest) + goto nla_put_failure; + if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) + notify_bits |= FDB_NOTIFY_INACTIVE_BIT; + + if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) { + nla_nest_cancel(skb, nest); + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + } + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t fdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */ + + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ +} + +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, int type, + bool swdev_notify) +{ + struct net *net = dev_net(br->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + if (swdev_notify) + br_switchdev_fdb_notify(fdb, type); + + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + +/* Dump information about entries, in response to GETNEIGH */ +int br_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, + int *idx) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_fdb_entry *f; + int err = 0; + + if (!(dev->priv_flags & IFF_EBRIDGE)) + return err; + + if (!filter_dev) { + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (err < 0) + return err; + } + + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (*idx < cb->args[2]) + goto skip; + if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev != dev) + goto skip; + /* !f->dst is a special case for bridge + * It means the MAC belongs to the bridge + * Therefore need a little more filtering + * we only want to dump the !f->dst case + */ + if (f->dst) + goto skip; + } + if (!filter_dev && f->dst) + goto skip; + + err = fdb_fill_info(skb, br, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI); + if (err < 0) + break; +skip: + *idx += 1; + } + rcu_read_unlock(); + + return err; +} + +int br_fdb_get(struct sk_buff *skb, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 vid, u32 portid, u32 seq, + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_fdb_entry *f; + int err = 0; + + rcu_read_lock(); + f = br_fdb_find_rcu(br, addr, vid); + if (!f) { + NL_SET_ERR_MSG(extack, "Fdb entry not found"); + err = -ENOENT; + goto errout; + } + + err = fdb_fill_info(skb, br, f, portid, seq, + RTM_NEWNEIGH, 0); +errout: + rcu_read_unlock(); + return err; +} + +/* returns true if the fdb is modified */ +static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify) +{ + bool modified = false; + + /* allow to mark an entry as inactive, usually done on creation */ + if ((notify & FDB_NOTIFY_INACTIVE_BIT) && + !test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) + modified = true; + + if ((notify & FDB_NOTIFY_BIT) && + !test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) { + /* enabled activity tracking */ + modified = true; + } else if (!(notify & FDB_NOTIFY_BIT) && + test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) { + /* disabled activity tracking, clear notify state */ + clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags); + modified = true; + } + + return modified; +} + +/* Update (create or replace) forwarding database entry */ +static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, + const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid, + struct nlattr *nfea_tb[]) +{ + bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY); + bool refresh = !nfea_tb[NFEA_DONT_REFRESH]; + struct net_bridge_fdb_entry *fdb; + u16 state = ndm->ndm_state; + bool modified = false; + u8 notify = 0; + + /* If the port cannot learn allow only local and static entries */ + if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) && + !(source->state == BR_STATE_LEARNING || + source->state == BR_STATE_FORWARDING)) + return -EPERM; + + if (!source && !(state & NUD_PERMANENT)) { + pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n", + br->dev->name); + return -EINVAL; + } + + if (is_sticky && (state & NUD_PERMANENT)) + return -EINVAL; + + if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) { + notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]); + if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) || + (notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT) + return -EINVAL; + } + + fdb = br_fdb_find(br, addr, vid); + if (fdb == NULL) { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; + + fdb = fdb_create(br, source, addr, vid, 0); + if (!fdb) + return -ENOMEM; + + modified = true; + } else { + if (flags & NLM_F_EXCL) + return -EEXIST; + + if (fdb->dst != source) { + fdb->dst = source; + modified = true; + } + } + + if (fdb_to_nud(br, fdb) != state) { + if (state & NUD_PERMANENT) { + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags)) + fdb_add_hw_addr(br, addr); + } else if (state & NUD_NOARP) { + clear_bit(BR_FDB_LOCAL, &fdb->flags); + if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags)) + fdb_add_hw_addr(br, addr); + } else { + clear_bit(BR_FDB_LOCAL, &fdb->flags); + if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags)) + fdb_del_hw_addr(br, addr); + } + + modified = true; + } + + if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) { + change_bit(BR_FDB_STICKY, &fdb->flags); + modified = true; + } + + if (fdb_handle_notify(fdb, notify)) + modified = true; + + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + + fdb->used = jiffies; + if (modified) { + if (refresh) + fdb->updated = jiffies; + fdb_notify(br, fdb, RTM_NEWNEIGH, true); + } + + return 0; +} + +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, + struct net_bridge_port *p, const unsigned char *addr, + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + if (!p) { + pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n", + br->dev->name); + return -EINVAL; + } + if (!nbp_state_should_learn(p)) + return 0; + + local_bh_disable(); + rcu_read_lock(); + br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER)); + rcu_read_unlock(); + local_bh_enable(); + } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } + err = br_fdb_external_learn_add(br, p, addr, vid, true); + } else { + spin_lock_bh(&br->hash_lock); + err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); + spin_unlock_bh(&br->hash_lock); + } + + return err; +} + +static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { + [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 }, + [NFEA_DONT_REFRESH] = { .type = NLA_FLAG }, +}; + +/* Add new permanent fdb entry with RTM_NEWNEIGH */ +int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack) +{ + struct nlattr *nfea_tb[NFEA_MAX + 1], *attr; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan *v; + struct net_bridge *br = NULL; + int err = 0; + + trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags); + + if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { + pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); + return -EINVAL; + } + + if (is_zero_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_EBRIDGE) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + } else { + p = br_port_get_rtnl(dev); + if (!p) { + pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + br = p->br; + vg = nbp_vlan_group(p); + } + + if (tb[NDA_FDB_EXT_ATTRS]) { + attr = tb[NDA_FDB_EXT_ATTRS]; + err = nla_parse_nested(nfea_tb, NFEA_MAX, attr, + br_nda_fdb_pol, extack); + if (err) + return err; + } else { + memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1)); + } + + if (vid) { + v = br_vlan_find(vg, vid); + if (!v || !br_vlan_should_use(v)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); + } else { + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); + if (err || !vg || !vg->num_vlans) + goto out; + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, + nfea_tb, extack); + if (err) + goto out; + } + } + +out: + return err; +} + +static int fdb_delete_by_addr_and_port(struct net_bridge *br, + const struct net_bridge_port *p, + const u8 *addr, u16 vlan) +{ + struct net_bridge_fdb_entry *fdb; + + fdb = br_fdb_find(br, addr, vlan); + if (!fdb || fdb->dst != p) + return -ENOENT; + + fdb_delete(br, fdb, true); + + return 0; +} + +static int __br_fdb_delete(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&br->hash_lock); + err = fdb_delete_by_addr_and_port(br, p, addr, vid); + spin_unlock_bh(&br->hash_lock); + + return err; +} + +/* Remove neighbor entry with RTM_DELNEIGH */ +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan *v; + struct net_bridge *br; + int err; + + if (dev->priv_flags & IFF_EBRIDGE) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + } else { + p = br_port_get_rtnl(dev); + if (!p) { + pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + vg = nbp_vlan_group(p); + br = p->br; + } + + if (vid) { + v = br_vlan_find(vg, vid); + if (!v) { + pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name); + return -EINVAL; + } + + err = __br_fdb_delete(br, p, addr, vid); + } else { + err = -ENOENT; + err &= __br_fdb_delete(br, p, addr, 0); + if (!vg || !vg->num_vlans) + return err; + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + err &= __br_fdb_delete(br, p, addr, v->vid); + } + } + + return err; +} + +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *f, *tmp; + int err = 0; + + ASSERT_RTNL(); + + /* the key here is that static entries change only under rtnl */ + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!test_bit(BR_FDB_STATIC, &f->flags)) + continue; + err = dev_uc_add(p->dev, f->key.addr.addr); + if (err) + goto rollback; + } +done: + rcu_read_unlock(); + + return err; + +rollback: + hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!test_bit(BR_FDB_STATIC, &tmp->flags)) + continue; + if (tmp == f) + break; + dev_uc_del(p->dev, tmp->key.addr.addr); + } + + goto done; +} + +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *f; + + ASSERT_RTNL(); + + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!test_bit(BR_FDB_STATIC, &f->flags)) + continue; + + dev_uc_del(p->dev, f->key.addr.addr); + } + rcu_read_unlock(); +} + +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, + bool swdev_notify) +{ + struct net_bridge_fdb_entry *fdb; + bool modified = false; + int err = 0; + + trace_br_fdb_external_learn_add(br, p, addr, vid); + + spin_lock_bh(&br->hash_lock); + + fdb = br_fdb_find(br, addr, vid); + if (!fdb) { + unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN); + + if (swdev_notify) + flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (!p) + flags |= BIT(BR_FDB_LOCAL); + + fdb = fdb_create(br, p, addr, vid, flags); + if (!fdb) { + err = -ENOMEM; + goto err_unlock; + } + fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); + } else { + fdb->updated = jiffies; + + if (fdb->dst != p) { + fdb->dst = p; + modified = true; + } + + if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { + /* Refresh entry */ + fdb->used = jiffies; + } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) { + /* Take over SW learned entry */ + set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags); + modified = true; + } + + if (swdev_notify) + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + + if (!p) + set_bit(BR_FDB_LOCAL, &fdb->flags); + + if (modified) + fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); + } + +err_unlock: + spin_unlock_bh(&br->hash_lock); + + return err; +} + +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, + bool swdev_notify) +{ + struct net_bridge_fdb_entry *fdb; + int err = 0; + + spin_lock_bh(&br->hash_lock); + + fdb = br_fdb_find(br, addr, vid); + if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) + fdb_delete(br, fdb, swdev_notify); + else + err = -ENOENT; + + spin_unlock_bh(&br->hash_lock); + + return err; +} + +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, bool offloaded) +{ + struct net_bridge_fdb_entry *fdb; + + spin_lock_bh(&br->hash_lock); + + fdb = br_fdb_find(br, addr, vid); + if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags)) + change_bit(BR_FDB_OFFLOADED, &fdb->flags); + + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_clear_offload(const struct net_device *dev, u16 vid) +{ + struct net_bridge_fdb_entry *f; + struct net_bridge_port *p; + + ASSERT_RTNL(); + + p = br_port_get_rtnl(dev); + if (!p) + return; + + spin_lock_bh(&p->br->hash_lock); + hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) { + if (f->dst == p && f->key.vlan_id == vid) + clear_bit(BR_FDB_OFFLOADED, &f->flags); + } + spin_unlock_bh(&p->br->hash_lock); +} +EXPORT_SYMBOL_GPL(br_fdb_clear_offload); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c new file mode 100644 index 000000000..f2ef75c7c --- /dev/null +++ b/net/bridge/br_forward.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Forwarding decision + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/netpoll.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/netfilter_bridge.h> +#include "br_private.h" + +/* Don't forward packets to originating port or forwarding disabled */ +static inline int should_deliver(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + struct net_bridge_vlan_group *vg; + + vg = nbp_vlan_group_rcu(p); + return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) && + nbp_switchdev_allowed_egress(p, skb) && + !br_skb_isolated(p, skb); +} + +int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + + br_drop_fake_rtable(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD))) { + int depth; + + if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); + } + + dev_queue_xmit(skb); + + return 0; + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); + +int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + skb->tstamp = 0; + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + br_dev_queue_push_xmit); + +} +EXPORT_SYMBOL_GPL(br_forward_finish); + +static void __br_forward(const struct net_bridge_port *to, + struct sk_buff *skb, bool local_orig) +{ + struct net_bridge_vlan_group *vg; + struct net_device *indev; + struct net *net; + int br_hook; + + vg = nbp_vlan_group_rcu(to); + skb = br_handle_vlan(to->br, to, vg, skb); + if (!skb) + return; + + indev = skb->dev; + skb->dev = to->dev; + if (!local_orig) { + if (skb_warn_if_lro(skb)) { + kfree_skb(skb); + return; + } + br_hook = NF_BR_FORWARD; + skb_forward_csum(skb); + net = dev_net(indev); + } else { + if (unlikely(netpoll_tx_running(to->br->dev))) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) + kfree_skb(skb); + else + br_netpoll_send_skb(to, skb); + return; + } + br_hook = NF_BR_LOCAL_OUT; + net = dev_net(skb->dev); + indev = NULL; + } + + NF_HOOK(NFPROTO_BRIDGE, br_hook, + net, NULL, skb, indev, skb->dev, + br_forward_finish); +} + +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, bool local_orig) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) { + DEV_STATS_INC(dev, tx_dropped); + return -ENOMEM; + } + + __br_forward(prev, skb, local_orig); + return 0; +} + +/** + * br_forward - forward a packet to a specific port + * @to: destination port + * @skb: packet being forwarded + * @local_rcv: packet will be received locally after forwarding + * @local_orig: packet is locally originated + * + * Should be called with rcu_read_lock. + */ +void br_forward(const struct net_bridge_port *to, + struct sk_buff *skb, bool local_rcv, bool local_orig) +{ + if (unlikely(!to)) + goto out; + + /* redirect to backup link if the destination port is down */ + if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) { + struct net_bridge_port *backup_port; + + backup_port = rcu_dereference(to->backup_port); + if (unlikely(!backup_port)) + goto out; + to = backup_port; + } + + if (should_deliver(to, skb)) { + if (local_rcv) + deliver_clone(to, skb, local_orig); + else + __br_forward(to, skb, local_orig); + return; + } + +out: + if (!local_rcv) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(br_forward); + +static struct net_bridge_port *maybe_deliver( + struct net_bridge_port *prev, struct net_bridge_port *p, + struct sk_buff *skb, bool local_orig) +{ + u8 igmp_type = br_multicast_igmp_type(skb); + int err; + + if (!should_deliver(p, skb)) + return prev; + + if (!prev) + goto out; + + err = deliver_clone(prev, skb, local_orig); + if (err) + return ERR_PTR(err); +out: + br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX); + + return p; +} + +/* called under rcu_read_lock */ +void br_flood(struct net_bridge *br, struct sk_buff *skb, + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) +{ + struct net_bridge_port *prev = NULL; + struct net_bridge_port *p; + + list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off, nor + * other traffic if flood off, except for traffic we originate + */ + switch (pkt_type) { + case BR_PKT_UNICAST: + if (!(p->flags & BR_FLOOD)) + continue; + break; + case BR_PKT_MULTICAST: + if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + case BR_PKT_BROADCAST: + if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + } + + /* Do not flood to ports that enable proxy ARP */ + if (p->flags & BR_PROXYARP) + continue; + if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) && + BR_INPUT_SKB_CB(skb)->proxyarp_replied) + continue; + + prev = maybe_deliver(prev, p, skb, local_orig); + if (IS_ERR(prev)) + goto out; + } + + if (!prev) + goto out; + + if (local_rcv) + deliver_clone(prev, skb, local_orig); + else + __br_forward(prev, skb, local_orig); + return; + +out: + if (!local_rcv) + kfree_skb(skb); +} + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, + const unsigned char *addr, bool local_orig) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + const unsigned char *src = eth_hdr(skb)->h_source; + + if (!should_deliver(p, skb)) + return; + + /* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */ + if (skb->dev == p->dev && ether_addr_equal(src, addr)) + return; + + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) { + DEV_STATS_INC(dev, tx_dropped); + return; + } + + if (!is_broadcast_ether_addr(addr)) + memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); + + __br_forward(p, skb, local_orig); +} + +/* called with rcu_read_lock */ +void br_multicast_flood(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, + bool local_rcv, bool local_orig) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *prev = NULL; + struct net_bridge_port_group *p; + bool allow_mode_include = true; + struct hlist_node *rp; + + rp = rcu_dereference(hlist_first_rcu(&br->router_list)); + if (mdst) { + p = rcu_dereference(mdst->ports); + if (br_multicast_should_handle_mode(br, mdst->addr.proto) && + br_multicast_is_star_g(&mdst->addr)) + allow_mode_include = false; + } else { + p = NULL; + } + + while (p || rp) { + struct net_bridge_port *port, *lport, *rport; + + lport = p ? p->key.port : NULL; + rport = hlist_entry_safe(rp, struct net_bridge_port, rlist); + + if ((unsigned long)lport > (unsigned long)rport) { + port = lport; + + if (port->flags & BR_MULTICAST_TO_UNICAST) { + maybe_deliver_addr(lport, skb, p->eth_addr, + local_orig); + goto delivered; + } + if ((!allow_mode_include && + p->filter_mode == MCAST_INCLUDE) || + (p->flags & MDB_PG_FLAGS_BLOCKED)) + goto delivered; + } else { + port = rport; + } + + prev = maybe_deliver(prev, port, skb, local_orig); + if (IS_ERR(prev)) + goto out; +delivered: + if ((unsigned long)lport >= (unsigned long)port) + p = rcu_dereference(p->next); + if ((unsigned long)rport >= (unsigned long)port) + rp = rcu_dereference(hlist_next_rcu(rp)); + } + + if (!prev) + goto out; + + if (local_rcv) + deliver_clone(prev, skb, local_orig); + else + __br_forward(prev, skb, local_orig); + return; + +out: + if (!local_rcv) + kfree_skb(skb); +} +#endif diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c new file mode 100644 index 000000000..e35488fde --- /dev/null +++ b/net/bridge/br_if.c @@ -0,0 +1,793 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Userspace interface + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/netpoll.h> +#include <linux/ethtool.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rtnetlink.h> +#include <linux/if_ether.h> +#include <linux/slab.h> +#include <net/dsa.h> +#include <net/sock.h> +#include <linux/if_vlan.h> +#include <net/switchdev.h> +#include <net/net_namespace.h> + +#include "br_private.h" + +/* + * Determine initial path cost based on speed. + * using recommendations from 802.1d standard + * + * Since driver might sleep need to not be holding any locks. + */ +static int port_cost(struct net_device *dev) +{ + struct ethtool_link_ksettings ecmd; + + if (!__ethtool_get_link_ksettings(dev, &ecmd)) { + switch (ecmd.base.speed) { + case SPEED_10000: + return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; + case SPEED_10: + return 100; + } + } + + /* Old silly heuristics based on name */ + if (!strncmp(dev->name, "lec", 3)) + return 7; + + if (!strncmp(dev->name, "plip", 4)) + return 2500; + + return 100; /* assume old 10Mbps */ +} + + +/* Check for port carrier transitions. */ +void br_port_carrier_check(struct net_bridge_port *p, bool *notified) +{ + struct net_device *dev = p->dev; + struct net_bridge *br = p->br; + + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) + p->path_cost = port_cost(dev); + + *notified = false; + if (!netif_running(br->dev)) + return; + + spin_lock_bh(&br->lock); + if (netif_running(dev) && netif_oper_up(dev)) { + if (p->state == BR_STATE_DISABLED) { + br_stp_enable_port(p); + *notified = true; + } + } else { + if (p->state != BR_STATE_DISABLED) { + br_stp_disable_port(p); + *notified = true; + } + } + spin_unlock_bh(&br->lock); +} + +static void br_port_set_promisc(struct net_bridge_port *p) +{ + int err = 0; + + if (br_promisc_port(p)) + return; + + err = dev_set_promiscuity(p->dev, 1); + if (err) + return; + + br_fdb_unsync_static(p->br, p); + p->flags |= BR_PROMISC; +} + +static void br_port_clear_promisc(struct net_bridge_port *p) +{ + int err; + + /* Check if the port is already non-promisc or if it doesn't + * support UNICAST filtering. Without unicast filtering support + * we'll end up re-enabling promisc mode anyway, so just check for + * it here. + */ + if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT)) + return; + + /* Since we'll be clearing the promisc mode, program the port + * first so that we don't have interruption in traffic. + */ + err = br_fdb_sync_static(p->br, p); + if (err) + return; + + dev_set_promiscuity(p->dev, -1); + p->flags &= ~BR_PROMISC; +} + +/* When a port is added or removed or when certain port flags + * change, this function is called to automatically manage + * promiscuity setting of all the bridge ports. We are always called + * under RTNL so can skip using rcu primitives. + */ +void br_manage_promisc(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool set_all = false; + + /* If vlan filtering is disabled or bridge interface is placed + * into promiscuous mode, place all ports in promiscuous mode. + */ + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev)) + set_all = true; + + list_for_each_entry(p, &br->port_list, list) { + if (set_all) { + br_port_set_promisc(p); + } else { + /* If the number of auto-ports is <= 1, then all other + * ports will have their output configuration + * statically specified through fdbs. Since ingress + * on the auto-port becomes forwarding/egress to other + * ports and egress configuration is statically known, + * we can say that ingress configuration of the + * auto-port is also statically known. + * This lets us disable promiscuous mode and write + * this config to hw. + */ + if ((p->dev->priv_flags & IFF_UNICAST_FLT) && + (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p)))) + br_port_clear_promisc(p); + else + br_port_set_promisc(p); + } + } +} + +int nbp_backup_change(struct net_bridge_port *p, + struct net_device *backup_dev) +{ + struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port); + struct net_bridge_port *backup_p = NULL; + + ASSERT_RTNL(); + + if (backup_dev) { + if (!netif_is_bridge_port(backup_dev)) + return -ENOENT; + + backup_p = br_port_get_rtnl(backup_dev); + if (backup_p->br != p->br) + return -EINVAL; + } + + if (p == backup_p) + return -EINVAL; + + if (old_backup == backup_p) + return 0; + + /* if the backup link is already set, clear it */ + if (old_backup) + old_backup->backup_redirected_cnt--; + + if (backup_p) + backup_p->backup_redirected_cnt++; + rcu_assign_pointer(p->backup_port, backup_p); + + return 0; +} + +static void nbp_backup_clear(struct net_bridge_port *p) +{ + nbp_backup_change(p, NULL); + if (p->backup_redirected_cnt) { + struct net_bridge_port *cur_p; + + list_for_each_entry(cur_p, &p->br->port_list, list) { + struct net_bridge_port *backup_p; + + backup_p = rtnl_dereference(cur_p->backup_port); + if (backup_p == p) + nbp_backup_change(cur_p, NULL); + } + } + + WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt); +} + +static void nbp_update_port_count(struct net_bridge *br) +{ + struct net_bridge_port *p; + u32 cnt = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (br_auto_port(p)) + cnt++; + } + if (br->auto_cnt != cnt) { + br->auto_cnt = cnt; + br_manage_promisc(br); + } +} + +static void nbp_delete_promisc(struct net_bridge_port *p) +{ + /* If port is currently promiscuous, unset promiscuity. + * Otherwise, it is a static port so remove all addresses + * from it. + */ + dev_set_allmulti(p->dev, -1); + if (br_promisc_port(p)) + dev_set_promiscuity(p->dev, -1); + else + br_fdb_unsync_static(p->br, p); +} + +static void release_nbp(struct kobject *kobj) +{ + struct net_bridge_port *p + = container_of(kobj, struct net_bridge_port, kobj); + kfree(p); +} + +static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + struct net_bridge_port *p = kobj_to_brport(kobj); + + net_ns_get_ownership(dev_net(p->dev), uid, gid); +} + +static struct kobj_type brport_ktype = { +#ifdef CONFIG_SYSFS + .sysfs_ops = &brport_sysfs_ops, +#endif + .release = release_nbp, + .get_ownership = brport_get_ownership, +}; + +static void destroy_nbp(struct net_bridge_port *p) +{ + struct net_device *dev = p->dev; + + p->br = NULL; + p->dev = NULL; + dev_put(dev); + + kobject_put(&p->kobj); +} + +static void destroy_nbp_rcu(struct rcu_head *head) +{ + struct net_bridge_port *p = + container_of(head, struct net_bridge_port, rcu); + destroy_nbp(p); +} + +static unsigned get_max_headroom(struct net_bridge *br) +{ + unsigned max_headroom = 0; + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + unsigned dev_headroom = netdev_get_fwd_headroom(p->dev); + + if (dev_headroom > max_headroom) + max_headroom = dev_headroom; + } + + return max_headroom; +} + +static void update_headroom(struct net_bridge *br, int new_hr) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) + netdev_set_rx_headroom(p->dev, new_hr); + + br->dev->needed_headroom = new_hr; +} + +/* Delete port(interface) from bridge is done in two steps. + * via RCU. First step, marks device as down. That deletes + * all the timers and stops new packets from flowing through. + * + * Final cleanup doesn't occur until after all CPU's finished + * processing packets. + * + * Protected from multiple admin operations by RTNL mutex + */ +static void del_nbp(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + struct net_device *dev = p->dev; + + sysfs_remove_link(br->ifobj, p->dev->name); + + nbp_delete_promisc(p); + + spin_lock_bh(&br->lock); + br_stp_disable_port(p); + spin_unlock_bh(&br->lock); + + br_mrp_port_del(br, p); + + br_ifinfo_notify(RTM_DELLINK, NULL, p); + + list_del_rcu(&p->list); + if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom) + update_headroom(br, get_max_headroom(br)); + netdev_reset_rx_headroom(dev); + + nbp_vlan_flush(p); + br_fdb_delete_by_port(br, p, 0, 1); + switchdev_deferred_process(); + nbp_backup_clear(p); + + nbp_update_port_count(br); + + netdev_upper_dev_unlink(dev, br->dev); + + dev->priv_flags &= ~IFF_BRIDGE_PORT; + + netdev_rx_handler_unregister(dev); + + br_multicast_del_port(p); + + kobject_uevent(&p->kobj, KOBJ_REMOVE); + kobject_del(&p->kobj); + + br_netpoll_disable(p); + + call_rcu(&p->rcu, destroy_nbp_rcu); +} + +/* Delete bridge device */ +void br_dev_delete(struct net_device *dev, struct list_head *head) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p, *n; + + list_for_each_entry_safe(p, n, &br->port_list, list) { + del_nbp(p); + } + + br_recalculate_neigh_suppress_enabled(br); + + br_fdb_delete_by_port(br, NULL, 0, 1); + + cancel_delayed_work_sync(&br->gc_work); + + br_sysfs_delbr(br->dev); + unregister_netdevice_queue(br->dev, head); +} + +/* find an available port number */ +static int find_portno(struct net_bridge *br) +{ + int index; + struct net_bridge_port *p; + unsigned long *inuse; + + inuse = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + set_bit(0, inuse); /* zero is reserved */ + list_for_each_entry(p, &br->port_list, list) { + set_bit(p->port_no, inuse); + } + index = find_first_zero_bit(inuse, BR_MAX_PORTS); + bitmap_free(inuse); + + return (index >= BR_MAX_PORTS) ? -EXFULL : index; +} + +/* called with RTNL but without bridge lock */ +static struct net_bridge_port *new_nbp(struct net_bridge *br, + struct net_device *dev) +{ + struct net_bridge_port *p; + int index, err; + + index = find_portno(br); + if (index < 0) + return ERR_PTR(index); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + + p->br = br; + dev_hold(dev); + p->dev = dev; + p->path_cost = port_cost(dev); + p->priority = 0x8000 >> BR_PORT_BITS; + p->port_no = index; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + br_init_port(p); + br_set_state(p, BR_STATE_DISABLED); + br_stp_port_timer_init(p); + err = br_multicast_add_port(p); + if (err) { + dev_put(dev); + kfree(p); + p = ERR_PTR(err); + } + + return p; +} + +int br_add_bridge(struct net *net, const char *name) +{ + struct net_device *dev; + int res; + + dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN, + br_dev_setup); + + if (!dev) + return -ENOMEM; + + dev_net_set(dev, net); + dev->rtnl_link_ops = &br_link_ops; + + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; +} + +int br_del_bridge(struct net *net, const char *name) +{ + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + dev = __dev_get_by_name(net, name); + if (dev == NULL) + ret = -ENXIO; /* Could not find device */ + + else if (!(dev->priv_flags & IFF_EBRIDGE)) { + /* Attempt to delete non bridge device! */ + ret = -EPERM; + } + + else if (dev->flags & IFF_UP) { + /* Not shutdown yet. */ + ret = -EBUSY; + } + + else + br_dev_delete(dev, NULL); + + rtnl_unlock(); + return ret; +} + +/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */ +static int br_mtu_min(const struct net_bridge *br) +{ + const struct net_bridge_port *p; + int ret_mtu = 0; + + list_for_each_entry(p, &br->port_list, list) + if (!ret_mtu || ret_mtu > p->dev->mtu) + ret_mtu = p->dev->mtu; + + return ret_mtu ? ret_mtu : ETH_DATA_LEN; +} + +void br_mtu_auto_adjust(struct net_bridge *br) +{ + ASSERT_RTNL(); + + /* if the bridge MTU was manually configured don't mess with it */ + if (br_opt_get(br, BROPT_MTU_SET_BY_USER)) + return; + + /* change to the minimum MTU and clear the flag which was set by + * the bridge ndo_change_mtu callback + */ + dev_set_mtu(br->dev, br_mtu_min(br)); + br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false); +} + +static void br_set_gso_limits(struct net_bridge *br) +{ + unsigned int gso_max_size = GSO_MAX_SIZE; + u16 gso_max_segs = GSO_MAX_SEGS; + const struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + gso_max_size = min(gso_max_size, p->dev->gso_max_size); + gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs); + } + br->dev->gso_max_size = gso_max_size; + br->dev->gso_max_segs = gso_max_segs; +} + +/* + * Recomputes features using slave's features + */ +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features) +{ + struct net_bridge_port *p; + netdev_features_t mask; + + if (list_empty(&br->port_list)) + return features; + + mask = features; + features &= ~NETIF_F_ONE_FOR_ALL; + + list_for_each_entry(p, &br->port_list, list) { + features = netdev_increment_features(features, + p->dev->features, mask); + } + features = netdev_add_tso_features(features, mask); + + return features; +} + +/* called with RTNL */ +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct net_bridge_port *p; + int err = 0; + unsigned br_hr, dev_hr; + bool changed_addr, fdb_synced = false; + + /* Don't allow bridging non-ethernet like devices. */ + if ((dev->flags & IFF_LOOPBACK) || + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || + !is_valid_ether_addr(dev->dev_addr)) + return -EINVAL; + + /* Also don't allow bridging of net devices that are DSA masters, since + * the bridge layer rx_handler prevents the DSA fake ethertype handler + * to be invoked, so we don't get the chance to strip off and parse the + * DSA switch tag protocol header (the bridge layer just returns + * RX_HANDLER_CONSUMED, stopping RX processing for these frames). + * The only case where that would not be an issue is when bridging can + * already be offloaded, such as when the DSA master is itself a DSA + * or plain switchdev port, and is bridged only with other ports from + * the same hardware device. + */ + if (netdev_uses_dsa(dev)) { + list_for_each_entry(p, &br->port_list, list) { + if (!netdev_port_same_parent_id(dev, p->dev)) { + NL_SET_ERR_MSG(extack, + "Cannot do software bridging with a DSA master"); + return -EINVAL; + } + } + } + + /* No bridging of bridges */ + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) { + NL_SET_ERR_MSG(extack, + "Can not enslave a bridge to a bridge"); + return -ELOOP; + } + + /* Device has master upper dev */ + if (netdev_master_upper_dev_get(dev)) + return -EBUSY; + + /* No bridging devices that dislike that (e.g. wireless) */ + if (dev->priv_flags & IFF_DONT_BRIDGE) { + NL_SET_ERR_MSG(extack, + "Device does not allow enslaving to a bridge"); + return -EOPNOTSUPP; + } + + p = new_nbp(br, dev); + if (IS_ERR(p)) + return PTR_ERR(p); + + call_netdevice_notifiers(NETDEV_JOIN, dev); + + err = dev_set_allmulti(dev, 1); + if (err) { + br_multicast_del_port(p); + kfree(p); /* kobject not yet init'd, manually free */ + goto err1; + } + + err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), + SYSFS_BRIDGE_PORT_ATTR); + if (err) + goto err2; + + err = br_sysfs_addif(p); + if (err) + goto err2; + + err = br_netpoll_enable(p); + if (err) + goto err3; + + err = netdev_rx_handler_register(dev, br_get_rx_handler(dev), p); + if (err) + goto err4; + + dev->priv_flags |= IFF_BRIDGE_PORT; + + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack); + if (err) + goto err5; + + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + + dev_disable_lro(dev); + + list_add_rcu(&p->list, &br->port_list); + + nbp_update_port_count(br); + if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) { + /* When updating the port count we also update all ports' + * promiscuous mode. + * A port leaving promiscuous mode normally gets the bridge's + * fdb synced to the unicast filter (if supported), however, + * `br_port_clear_promisc` does not distinguish between + * non-promiscuous ports and *new* ports, so we need to + * sync explicitly here. + */ + fdb_synced = br_fdb_sync_static(br, p) == 0; + if (!fdb_synced) + netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n"); + } + + netdev_update_features(br->dev); + + br_hr = br->dev->needed_headroom; + dev_hr = netdev_get_fwd_headroom(dev); + if (br_hr < dev_hr) + update_headroom(br, dev_hr); + else + netdev_set_rx_headroom(dev, br_hr); + + if (br_fdb_insert(br, p, dev->dev_addr, 0)) + netdev_err(dev, "failed insert local address bridge forwarding table\n"); + + if (br->dev->addr_assign_type != NET_ADDR_SET) { + /* Ask for permission to use this MAC address now, even if we + * don't end up choosing it below. + */ + err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack); + if (err) + goto err7; + } + + err = nbp_vlan_init(p, extack); + if (err) { + netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + goto err7; + } + + spin_lock_bh(&br->lock); + changed_addr = br_stp_recalculate_bridge_id(br); + + if (netif_running(dev) && netif_oper_up(dev) && + (br->dev->flags & IFF_UP)) + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + br_mtu_auto_adjust(br); + br_set_gso_limits(br); + + kobject_uevent(&p->kobj, KOBJ_ADD); + + return 0; + +err7: + if (fdb_synced) + br_fdb_unsync_static(br, p); + list_del_rcu(&p->list); + br_fdb_delete_by_port(br, p, 0, 1); + nbp_update_port_count(br); +err6: + netdev_upper_dev_unlink(dev, br->dev); +err5: + dev->priv_flags &= ~IFF_BRIDGE_PORT; + netdev_rx_handler_unregister(dev); +err4: + br_netpoll_disable(p); +err3: + sysfs_remove_link(br->ifobj, p->dev->name); +err2: + br_multicast_del_port(p); + kobject_put(&p->kobj); + dev_set_allmulti(dev, -1); +err1: + dev_put(dev); + return err; +} + +/* called with RTNL */ +int br_del_if(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + bool changed_addr; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br) + return -EINVAL; + + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ + del_nbp(p); + + br_mtu_auto_adjust(br); + br_set_gso_limits(br); + + spin_lock_bh(&br->lock); + changed_addr = br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + netdev_update_features(br->dev); + + return 0; +} + +void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) +{ + struct net_bridge *br = p->br; + + if (mask & BR_AUTO_MASK) + nbp_update_port_count(br); + + if (mask & BR_NEIGH_SUPPRESS) + br_recalculate_neigh_suppress_enabled(br); +} + +bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag) +{ + struct net_bridge_port *p; + + p = br_port_get_rtnl_rcu(dev); + if (!p) + return false; + + return p->flags & flag; +} +EXPORT_SYMBOL_GPL(br_port_flag_is_set); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c new file mode 100644 index 000000000..52dd0708f --- /dev/null +++ b/net/bridge/br_input.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handle incoming frames + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/netfilter_bridge.h> +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE +#include <net/netfilter/nf_queue.h> +#endif +#include <linux/neighbour.h> +#include <net/arp.h> +#include <net/dsa.h> +#include <linux/export.h> +#include <linux/rculist.h> +#include "br_private.h" +#include "br_private_tunnel.h" + +static int +br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + br_drop_fake_rtable(skb); + return netif_receive_skb(skb); +} + +static int br_pass_frame_up(struct sk_buff *skb) +{ + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); + struct net_bridge_vlan_group *vg; + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + + u64_stats_update_begin(&brstats->syncp); + brstats->rx_packets++; + brstats->rx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); + + vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + + /* Bridge is just like any other port. Make sure the + * packet is allowed except in promisc modue when someone + * may be running packet capture. + */ + if (!(brdev->flags & IFF_PROMISC) && + !br_allowed_egress(vg, skb)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + indev = skb->dev; + skb->dev = brdev; + skb = br_handle_vlan(br, NULL, vg, skb); + if (!skb) + return NET_RX_DROP; + /* update the multicast stats if the packet is IGMP/MLD */ + br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), + BR_MCAST_DIR_TX); + + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(indev), NULL, skb, indev, NULL, + br_netif_receive_skb); +} + +/* note: already called with rcu_read_lock */ +int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + enum br_pkt_type pkt_type = BR_PKT_UNICAST; + struct net_bridge_fdb_entry *dst = NULL; + struct net_bridge_mdb_entry *mdst; + bool local_rcv, mcast_hit = false; + struct net_bridge *br; + u16 vid = 0; + u8 state; + + if (!p || p->state == BR_STATE_DISABLED) + goto drop; + + state = p->state; + if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid, + &state)) + goto out; + + nbp_switchdev_frame_mark(p, skb); + + /* insert into forwarding database after filtering to avoid spoofing */ + br = p->br; + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); + + local_rcv = !!(br->dev->flags & IFF_PROMISC); + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { + /* by definition the broadcast is also a multicast address */ + if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { + pkt_type = BR_PKT_BROADCAST; + local_rcv = true; + } else { + pkt_type = BR_PKT_MULTICAST; + if (br_multicast_rcv(br, p, skb, vid)) + goto drop; + } + } + + if (state == BR_STATE_LEARNING) + goto drop; + + BR_INPUT_SKB_CB(skb)->brdev = br->dev; + BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED); + + if (IS_ENABLED(CONFIG_INET) && + (skb->protocol == htons(ETH_P_ARP) || + skb->protocol == htons(ETH_P_RARP))) { + br_do_proxy_suppress_arp(skb, br, vid, p); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, p, msg); + } + + switch (pkt_type) { + case BR_PKT_MULTICAST: + mdst = br_mdb_get(br, skb, vid); + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br, eth_hdr(skb))) { + if ((mdst && mdst->host_joined) || + br_multicast_is_router(br)) { + local_rcv = true; + DEV_STATS_INC(br->dev, multicast); + } + mcast_hit = true; + } else { + local_rcv = true; + DEV_STATS_INC(br->dev, multicast); + } + break; + case BR_PKT_UNICAST: + dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid); + default: + break; + } + + if (dst) { + unsigned long now = jiffies; + + if (test_bit(BR_FDB_LOCAL, &dst->flags)) + return br_pass_frame_up(skb); + + if (now != dst->used) + dst->used = now; + br_forward(dst->dst, skb, local_rcv, false); + } else { + if (!mcast_hit) + br_flood(br, skb, pkt_type, local_rcv, false); + else + br_multicast_flood(mdst, skb, local_rcv, false); + } + + if (local_rcv) + return br_pass_frame_up(skb); + +out: + return 0; +drop: + kfree_skb(skb); + goto out; +} +EXPORT_SYMBOL_GPL(br_handle_frame_finish); + +static void __br_handle_local_finish(struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + u16 vid = 0; + + /* check if vlan is allowed, to avoid spoofing */ + if ((p->flags & BR_LEARNING) && + nbp_state_should_learn(p) && + !br_opt_get(p->br, BROPT_NO_LL_LEARN) && + br_should_learn(p, skb, &vid)) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, 0); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + __br_handle_local_finish(skb); + + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; +} + +static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) +{ +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries *e = NULL; + struct nf_hook_state state; + unsigned int verdict, i; + struct net *net; + int ret; + + net = dev_net(skb->dev); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING])) + goto frame_finish; +#endif + + e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]); + if (!e) + goto frame_finish; + + nf_hook_state_init(&state, NF_BR_PRE_ROUTING, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + net, br_handle_frame_finish); + + for (i = 0; i < e->num_hook_entries; i++) { + verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) { + *pskb = skb; + return RX_HANDLER_PASS; + } + break; + case NF_DROP: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + case NF_QUEUE: + ret = nf_queue(skb, &state, i, verdict); + if (ret == 1) + continue; + return RX_HANDLER_CONSUMED; + default: /* STOLEN */ + return RX_HANDLER_CONSUMED; + } + } +frame_finish: + net = dev_net(skb->dev); + br_handle_frame_finish(net, NULL, skb); +#else + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); +#endif + return RX_HANDLER_CONSUMED; +} + +/* + * Return NULL if skb is handled + * note: already called with rcu_read_lock + */ +static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) +{ + struct net_bridge_port *p; + struct sk_buff *skb = *pskb; + const unsigned char *dest = eth_hdr(skb)->h_dest; + + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; + + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + goto drop; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return RX_HANDLER_CONSUMED; + + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + + p = br_port_get_rcu(skb->dev); + if (p->flags & BR_VLAN_TUNNEL) { + if (br_handle_ingress_vlan_tunnel(skb, p, + nbp_vlan_group_rcu(p))) + goto drop; + } + + if (unlikely(is_link_local_ether_addr(dest))) { + u16 fwd_mask = p->br->group_fwd_mask_required; + + /* + * See IEEE 802.1D Table 7-10 Reserved addresses + * + * Assignment Value + * Bridge Group Address 01-80-C2-00-00-00 + * (MAC Control) 802.3 01-80-C2-00-00-01 + * (Link Aggregation) 802.3 01-80-C2-00-00-02 + * 802.1X PAE address 01-80-C2-00-00-03 + * + * 802.1AB LLDP 01-80-C2-00-00-0E + * + * Others reserved for future standardization + */ + fwd_mask |= p->group_fwd_mask; + switch (dest[5]) { + case 0x00: /* Bridge Group Address */ + /* If STP is turned off, + then must forward to keep loop detection */ + if (p->br->stp_enabled == BR_NO_STP || + fwd_mask & (1u << dest[5])) + goto forward; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; + + case 0x01: /* IEEE MAC (Pause) */ + goto drop; + + case 0x0E: /* 802.1AB LLDP */ + fwd_mask |= p->br->group_fwd_mask; + if (fwd_mask & (1u << dest[5])) + goto forward; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; + + default: + /* Allow selective forwarding for most other protocols */ + fwd_mask |= p->br->group_fwd_mask; + if (fwd_mask & (1u << dest[5])) + goto forward; + } + + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } + } + + if (unlikely(br_mrp_process(p, skb))) + return RX_HANDLER_PASS; + +forward: + switch (p->state) { + case BR_STATE_FORWARDING: + case BR_STATE_LEARNING: + if (ether_addr_equal(p->br->dev->dev_addr, dest)) + skb->pkt_type = PACKET_HOST; + + return nf_hook_bridge_pre(skb, pskb); + default: +drop: + kfree_skb(skb); + } + return RX_HANDLER_CONSUMED; +} + +/* This function has no purpose other than to appease the br_port_get_rcu/rtnl + * helpers which identify bridged ports according to the rx_handler installed + * on them (so there _needs_ to be a bridge rx_handler even if we don't need it + * to do anything useful). This bridge won't support traffic to/from the stack, + * but only hardware bridging. So return RX_HANDLER_PASS so we don't steal + * frames from the ETH_P_XDSA packet_type handler. + */ +static rx_handler_result_t br_handle_frame_dummy(struct sk_buff **pskb) +{ + return RX_HANDLER_PASS; +} + +rx_handler_func_t *br_get_rx_handler(const struct net_device *dev) +{ + if (netdev_uses_dsa(dev)) + return br_handle_frame_dummy; + + return br_handle_frame; +} diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c new file mode 100644 index 000000000..2db800fc2 --- /dev/null +++ b/net/bridge/br_ioctl.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Ioctl handler + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/if_bridge.h> +#include <linux/netdevice.h> +#include <linux/slab.h> +#include <linux/times.h> +#include <net/net_namespace.h> +#include <linux/uaccess.h> +#include "br_private.h" + +static int get_bridge_ifindices(struct net *net, int *indices, int num) +{ + struct net_device *dev; + int i = 0; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (i >= num) + break; + if (dev->priv_flags & IFF_EBRIDGE) + indices[i++] = dev->ifindex; + } + rcu_read_unlock(); + + return i; +} + +/* called with RTNL */ +static void get_port_ifindices(struct net_bridge *br, int *ifindices, int num) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->port_no < num) + ifindices[p->port_no] = p->dev->ifindex; + } +} + +/* + * Format up to a page worth of forwarding table entries + * userbuf -- where to copy result + * maxnum -- maximum number of entries desired + * (limited to a page for sanity) + * offset -- number of records to skip + */ +static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, + unsigned long maxnum, unsigned long offset) +{ + int num; + void *buf; + size_t size; + + /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */ + if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry)) + maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); + + size = maxnum * sizeof(struct __fdb_entry); + + buf = kmalloc(size, GFP_USER); + if (!buf) + return -ENOMEM; + + num = br_fdb_fillbuf(br, buf, maxnum, offset); + if (num > 0) { + if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry))) + num = -EFAULT; + } + kfree(buf); + + return num; +} + +/* called with RTNL */ +static int add_del_if(struct net_bridge *br, int ifindex, int isadd) +{ + struct net *net = dev_net(br->dev); + struct net_device *dev; + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + return -EINVAL; + + if (isadd) + ret = br_add_if(br, dev, NULL); + else + ret = br_del_if(br, dev); + + return ret; +} + +/* + * Legacy ioctl's through SIOCDEVPRIVATE + * This interface is deprecated because it was too difficult + * to do the translation for 32/64bit ioctl compatibility. + */ +static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p = NULL; + unsigned long args[4]; + int ret = -EOPNOTSUPP; + + if (copy_from_user(args, rq->ifr_data, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_ADD_IF: + case BRCTL_DEL_IF: + return add_del_if(br, args[1], args[0] == BRCTL_ADD_IF); + + case BRCTL_GET_BRIDGE_INFO: + { + struct __bridge_info b; + + memset(&b, 0, sizeof(struct __bridge_info)); + rcu_read_lock(); + memcpy(&b.designated_root, &br->designated_root, 8); + memcpy(&b.bridge_id, &br->bridge_id, 8); + b.root_path_cost = br->root_path_cost; + b.max_age = jiffies_to_clock_t(br->max_age); + b.hello_time = jiffies_to_clock_t(br->hello_time); + b.forward_delay = br->forward_delay; + b.bridge_max_age = br->bridge_max_age; + b.bridge_hello_time = br->bridge_hello_time; + b.bridge_forward_delay = jiffies_to_clock_t(br->bridge_forward_delay); + b.topology_change = br->topology_change; + b.topology_change_detected = br->topology_change_detected; + b.root_port = br->root_port; + + b.stp_enabled = (br->stp_enabled != BR_NO_STP); + b.ageing_time = jiffies_to_clock_t(br->ageing_time); + b.hello_timer_value = br_timer_value(&br->hello_timer); + b.tcn_timer_value = br_timer_value(&br->tcn_timer); + b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); + b.gc_timer_value = br_timer_value(&br->gc_work.timer); + rcu_read_unlock(); + + if (copy_to_user((void __user *)args[1], &b, sizeof(b))) + return -EFAULT; + + return 0; + } + + case BRCTL_GET_PORT_LIST: + { + int num, *indices; + + num = args[2]; + if (num < 0) + return -EINVAL; + if (num == 0) + num = 256; + if (num > BR_MAX_PORTS) + num = BR_MAX_PORTS; + + indices = kcalloc(num, sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + get_port_ifindices(br, indices, num); + if (copy_to_user((void __user *)args[1], indices, num*sizeof(int))) + num = -EFAULT; + kfree(indices); + return num; + } + + case BRCTL_SET_BRIDGE_FORWARD_DELAY: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = br_set_forward_delay(br, args[1]); + break; + + case BRCTL_SET_BRIDGE_HELLO_TIME: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = br_set_hello_time(br, args[1]); + break; + + case BRCTL_SET_BRIDGE_MAX_AGE: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = br_set_max_age(br, args[1]); + break; + + case BRCTL_SET_AGEING_TIME: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = br_set_ageing_time(br, args[1]); + break; + + case BRCTL_GET_PORT_INFO: + { + struct __port_info p; + struct net_bridge_port *pt; + + rcu_read_lock(); + if ((pt = br_get_port(br, args[2])) == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + memset(&p, 0, sizeof(struct __port_info)); + memcpy(&p.designated_root, &pt->designated_root, 8); + memcpy(&p.designated_bridge, &pt->designated_bridge, 8); + p.port_id = pt->port_id; + p.designated_port = pt->designated_port; + p.path_cost = pt->path_cost; + p.designated_cost = pt->designated_cost; + p.state = pt->state; + p.top_change_ack = pt->topology_change_ack; + p.config_pending = pt->config_pending; + p.message_age_timer_value = br_timer_value(&pt->message_age_timer); + p.forward_delay_timer_value = br_timer_value(&pt->forward_delay_timer); + p.hold_timer_value = br_timer_value(&pt->hold_timer); + + rcu_read_unlock(); + + if (copy_to_user((void __user *)args[1], &p, sizeof(p))) + return -EFAULT; + + return 0; + } + + case BRCTL_SET_BRIDGE_STP_STATE: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = br_stp_set_enabled(br, args[1], NULL); + break; + + case BRCTL_SET_BRIDGE_PRIORITY: + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + br_stp_set_bridge_priority(br, args[1]); + ret = 0; + break; + + case BRCTL_SET_PORT_PRIORITY: + { + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + if ((p = br_get_port(br, args[1])) == NULL) + ret = -EINVAL; + else + ret = br_stp_set_port_priority(p, args[2]); + spin_unlock_bh(&br->lock); + break; + } + + case BRCTL_SET_PATH_COST: + { + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + if ((p = br_get_port(br, args[1])) == NULL) + ret = -EINVAL; + else + ret = br_stp_set_path_cost(p, args[2]); + spin_unlock_bh(&br->lock); + break; + } + + case BRCTL_GET_FDB_ENTRIES: + return get_fdb_entries(br, (void __user *)args[1], + args[2], args[3]); + } + + if (!ret) { + if (p) + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + else + netdev_state_change(br->dev); + } + + return ret; +} + +static int old_deviceless(struct net *net, void __user *uarg) +{ + unsigned long args[3]; + + if (copy_from_user(args, uarg, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_GET_VERSION: + return BRCTL_VERSION; + + case BRCTL_GET_BRIDGES: + { + int *indices; + int ret = 0; + + if (args[2] >= 2048) + return -ENOMEM; + indices = kcalloc(args[2], sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + args[2] = get_bridge_ifindices(net, indices, args[2]); + + ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) + ? -EFAULT : args[2]; + + kfree(indices); + return ret; + } + + case BRCTL_ADD_BRIDGE: + case BRCTL_DEL_BRIDGE: + { + char buf[IFNAMSIZ]; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) + return -EFAULT; + + buf[IFNAMSIZ-1] = 0; + + if (args[0] == BRCTL_ADD_BRIDGE) + return br_add_bridge(net, buf); + + return br_del_bridge(net, buf); + } + } + + return -EOPNOTSUPP; +} + +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) +{ + switch (cmd) { + case SIOCGIFBR: + case SIOCSIFBR: + return old_deviceless(net, uarg); + + case SIOCBRADDBR: + case SIOCBRDELBR: + { + char buf[IFNAMSIZ]; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(buf, uarg, IFNAMSIZ)) + return -EFAULT; + + buf[IFNAMSIZ-1] = 0; + if (cmd == SIOCBRADDBR) + return br_add_bridge(net, buf); + + return br_del_bridge(net, buf); + } + } + return -EOPNOTSUPP; +} + +int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct net_bridge *br = netdev_priv(dev); + + switch (cmd) { + case SIOCDEVPRIVATE: + return old_dev_ioctl(dev, rq, cmd); + + case SIOCBRADDIF: + case SIOCBRDELIF: + return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); + + } + + br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd); + return -EOPNOTSUPP; +} diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c new file mode 100644 index 000000000..e15bab19a --- /dev/null +++ b/net/bridge/br_mdb.c @@ -0,0 +1,1114 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/err.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <net/netlink.h> +#include <net/switchdev.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/addrconf.h> +#endif + +#include "br_private.h" + +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + struct nlattr *nest, *port_nest; + + if (!br->multicast_router || hlist_empty(&br->router_list)) + return 0; + + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(p, &br->router_list, rlist) { + if (!p) + continue; + port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); + if (!port_nest) + goto fail; + if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) || + nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER, + br_timer_value(&p->multicast_router_timer)) || + nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE, + p->multicast_router)) { + nla_nest_cancel(skb, port_nest); + goto fail; + } + nla_nest_end(skb, port_nest); + } + + nla_nest_end(skb, nest); + return 0; +fail: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) +{ + e->state = flags & MDB_PG_FLAGS_PERMANENT; + e->flags = 0; + if (flags & MDB_PG_FLAGS_OFFLOAD) + e->flags |= MDB_FLAGS_OFFLOAD; + if (flags & MDB_PG_FLAGS_FAST_LEAVE) + e->flags |= MDB_FLAGS_FAST_LEAVE; + if (flags & MDB_PG_FLAGS_STAR_EXCL) + e->flags |= MDB_FLAGS_STAR_EXCL; + if (flags & MDB_PG_FLAGS_BLOCKED) + e->flags |= MDB_FLAGS_BLOCKED; +} + +static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip, + struct nlattr **mdb_attrs) +{ + memset(ip, 0, sizeof(struct br_ip)); + ip->vid = entry->vid; + ip->proto = entry->addr.proto; + switch (ip->proto) { + case htons(ETH_P_IP): + ip->dst.ip4 = entry->addr.u.ip4; + if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE]) + ip->src.ip4 = nla_get_in_addr(mdb_attrs[MDBE_ATTR_SOURCE]); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ip->dst.ip6 = entry->addr.u.ip6; + if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE]) + ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]); + break; +#endif + } + +} + +static int __mdb_fill_srcs(struct sk_buff *skb, + struct net_bridge_port_group *p) +{ + struct net_bridge_group_src *ent; + struct nlattr *nest, *nest_ent; + + if (hlist_empty(&p->src_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); + if (!nest) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(ent, &p->src_list, node, + lockdep_is_held(&p->key.port->br->multicast_lock)) { + nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); + if (!nest_ent) + goto out_cancel_err; + switch (ent->addr.proto) { + case htons(ETH_P_IP): + if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, + ent->addr.src.ip4)) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, + &ent->addr.src.ip6)) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + break; +#endif + default: + nla_nest_cancel(skb, nest_ent); + continue; + } + if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, + br_timer_value(&ent->timer))) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + nla_nest_end(skb, nest_ent); + } + + nla_nest_end(skb, nest); + + return 0; + +out_cancel_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int __mdb_fill_info(struct sk_buff *skb, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *p) +{ + bool dump_srcs_mode = false; + struct timer_list *mtimer; + struct nlattr *nest_ent; + struct br_mdb_entry e; + u8 flags = 0; + int ifindex; + + memset(&e, 0, sizeof(e)); + if (p) { + ifindex = p->key.port->dev->ifindex; + mtimer = &p->timer; + flags = p->flags; + } else { + ifindex = mp->br->dev->ifindex; + mtimer = &mp->timer; + } + + __mdb_entry_fill_flags(&e, flags); + e.ifindex = ifindex; + e.vid = mp->addr.vid; + if (mp->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = mp->addr.dst.ip4; +#if IS_ENABLED(CONFIG_IPV6) + if (mp->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = mp->addr.dst.ip6; +#endif + e.addr.proto = mp->addr.proto; + nest_ent = nla_nest_start_noflag(skb, + MDBA_MDB_ENTRY_INFO); + if (!nest_ent) + return -EMSGSIZE; + + if (nla_put_nohdr(skb, sizeof(e), &e) || + nla_put_u32(skb, + MDBA_MDB_EATTR_TIMER, + br_timer_value(mtimer))) + goto nest_err; + + switch (mp->addr.proto) { + case htons(ETH_P_IP): + dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3); + if (mp->addr.src.ip4) { + if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE, + mp->addr.src.ip4)) + goto nest_err; + break; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + dump_srcs_mode = !!(mp->br->multicast_mld_version == 2); + if (!ipv6_addr_any(&mp->addr.src.ip6)) { + if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE, + &mp->addr.src.ip6)) + goto nest_err; + break; + } + break; +#endif + } + if (p) { + if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol)) + goto nest_err; + if (dump_srcs_mode && + (__mdb_fill_srcs(skb, p) || + nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, + p->filter_mode))) + goto nest_err; + } + nla_nest_end(skb, nest_ent); + + return 0; + +nest_err: + nla_nest_cancel(skb, nest_ent); + return -EMSGSIZE; +} + +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2]; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_entry *mp; + struct nlattr *nest, *nest2; + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return 0; + + nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + + if (idx < s_idx) + goto skip; + + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!nest2) { + err = -EMSGSIZE; + break; + } + + if (!s_pidx && mp->host_joined) { + err = __mdb_fill_info(skb, mp, NULL); + if (err) { + nla_nest_cancel(skb, nest2); + break; + } + } + + for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + if (!p->key.port) + continue; + if (pidx < s_pidx) + goto skip_pg; + + err = __mdb_fill_info(skb, mp, p); + if (err) { + nla_nest_end(skb, nest2); + goto out; + } +skip_pg: + pidx++; + } + pidx = 0; + s_pidx = 0; + nla_nest_end(skb, nest2); +skip: + idx++; + } + +out: + cb->args[1] = idx; + cb->args[2] = pidx; + nla_nest_end(skb, nest); + return err; +} + +static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct br_port_msg *bpm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request"); + return -EINVAL; + } + + bpm = nlmsg_data(nlh); + if (bpm->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); + return -EINVAL; + } + + return 0; +} + +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + int idx = 0, s_idx; + + if (cb->strict_check) { + int err = br_mdb_valid_dump_req(cb->nlh, cb->extack); + + if (err < 0) + return err; + } + + s_idx = cb->args[0]; + + rcu_read_lock(); + + cb->seq = net->dev_base_seq; + + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { + struct br_port_msg *bpm; + + if (idx < s_idx) + goto skip; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, + sizeof(*bpm), NLM_F_MULTI); + if (nlh == NULL) + break; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->ifindex = dev->ifindex; + if (br_mdb_fill_info(skb, cb, dev) < 0) + goto out; + if (br_rports_fill_info(skb, cb, dev) < 0) + goto out; + + cb->args[1] = 0; + nlmsg_end(skb, nlh); + skip: + idx++; + } + } + +out: + if (nlh) + nlmsg_end(skb, nlh); + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +static int nlmsg_populate_mdb_fill(struct sk_buff *skb, + struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) +{ + struct nlmsghdr *nlh; + struct br_port_msg *bpm; + struct nlattr *nest, *nest2; + + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (nest == NULL) + goto cancel; + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) + goto end; + + if (__mdb_fill_info(skb, mp, pg)) + goto end; + + nla_nest_end(skb, nest2); + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + return 0; + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) +{ + size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)) + + nla_total_size(sizeof(u32)); + struct net_bridge_group_src *ent; + size_t addr_size = 0; + + if (!pg) + goto out; + + /* MDBA_MDB_EATTR_RTPROT */ + nlmsg_size += nla_total_size(sizeof(u8)); + + switch (pg->key.addr.proto) { + case htons(ETH_P_IP): + /* MDBA_MDB_EATTR_SOURCE */ + if (pg->key.addr.src.ip4) + nlmsg_size += nla_total_size(sizeof(__be32)); + if (pg->key.port->br->multicast_igmp_version == 2) + goto out; + addr_size = sizeof(__be32); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + /* MDBA_MDB_EATTR_SOURCE */ + if (!ipv6_addr_any(&pg->key.addr.src.ip6)) + nlmsg_size += nla_total_size(sizeof(struct in6_addr)); + if (pg->key.port->br->multicast_mld_version == 1) + goto out; + addr_size = sizeof(struct in6_addr); + break; +#endif + } + + /* MDBA_MDB_EATTR_GROUP_MODE */ + nlmsg_size += nla_total_size(sizeof(u8)); + + /* MDBA_MDB_EATTR_SRC_LIST nested attr */ + if (!hlist_empty(&pg->src_list)) + nlmsg_size += nla_total_size(0); + + hlist_for_each_entry(ent, &pg->src_list, node) { + /* MDBA_MDB_SRCLIST_ENTRY nested attr + + * MDBA_MDB_SRCATTR_ADDRESS + MDBA_MDB_SRCATTR_TIMER + */ + nlmsg_size += nla_total_size(0) + + nla_total_size(addr_size) + + nla_total_size(sizeof(u32)); + } +out: + return nlmsg_size; +} + +struct br_mdb_complete_info { + struct net_bridge_port *port; + struct br_ip ip; +}; + +static void br_mdb_complete(struct net_device *dev, int err, void *priv) +{ + struct br_mdb_complete_info *data = priv; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port *port = data->port; + struct net_bridge *br = port->br; + + if (err) + goto err; + + spin_lock_bh(&br->multicast_lock); + mp = br_mdb_ip_get(br, &data->ip); + if (!mp) + goto out; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->key.port != port) + continue; + p->flags |= MDB_PG_FLAGS_OFFLOAD; + } +out: + spin_unlock_bh(&br->multicast_lock); +err: + kfree(priv); +} + +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct net_bridge_mdb_entry *mp, + int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = mp->addr.vid, + }; + + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = dev; + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct net_bridge_mdb_entry *mp, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, mp, type); +} + +void br_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) +{ + struct br_mdb_complete_info *complete_info; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = mp->addr.vid, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + if (pg) { + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr); +#endif + mdb.obj.orig_dev = pg->key.port->dev; + switch (type) { + case RTM_NEWMDB: + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (!complete_info) + break; + complete_info->port = pg->key.port; + complete_info->ip = mp->addr; + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) + kfree(complete_info); + break; + case RTM_DELMDB: + switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); + break; + } + } else { + br_mdb_switchdev_host(dev, mp, type); + } + + skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_mdb_fill(skb, dev, mp, pg, type); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +static int nlmsg_populate_rtr_fill(struct sk_buff *skb, + struct net_device *dev, + int ifindex, u32 pid, + u32 seq, int type, unsigned int flags) +{ + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + struct nlattr *nest; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); + if (!nest) + goto cancel; + + if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex)) + goto end; + + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + return 0; + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_rtr_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(__u32)); +} + +void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, + int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + int ifindex; + + ifindex = port ? port->dev->ifindex : 0; + skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; + +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +static bool is_valid_mdb_entry(struct br_mdb_entry *entry, + struct netlink_ext_ack *extack) +{ + if (entry->ifindex == 0) { + NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed"); + return false; + } + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast"); + return false; + } + if (ipv4_is_local_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast"); + return false; + } +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { + NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes"); + return false; + } +#endif + } else { + NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol"); + return false; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG_MOD(extack, "Unknown entry state"); + return false; + } + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id"); + return false; + } + + return true; +} + +static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto, + struct netlink_ext_ack *extack) +{ + switch (proto) { + case htons(ETH_P_IP): + if (nla_len(attr) != sizeof(struct in_addr)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length"); + return false; + } + if (ipv4_is_multicast(nla_get_in_addr(attr))) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed"); + return false; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): { + struct in6_addr src; + + if (nla_len(attr) != sizeof(struct in6_addr)) { + NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length"); + return false; + } + src = nla_get_in6_addr(attr); + if (ipv6_addr_is_multicast(&src)) { + NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed"); + return false; + } + break; + } +#endif + default: + NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address"); + return false; + } + + return true; +} + +static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), +}; + +static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net_device **pdev, struct br_mdb_entry **pentry, + struct nlattr **mdb_attrs, struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct br_port_msg *bpm; + struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, NULL, NULL); + if (err < 0) + return err; + + bpm = nlmsg_data(nlh); + if (bpm->ifindex == 0) { + NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (dev == NULL) { + NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); + return -ENODEV; + } + + if (!(dev->priv_flags & IFF_EBRIDGE)) { + NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); + return -EOPNOTSUPP; + } + + *pdev = dev; + + if (!tb[MDBA_SET_ENTRY]) { + NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); + return -EINVAL; + } + + entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(entry, extack)) + return -EINVAL; + *pentry = entry; + + if (tb[MDBA_SET_ENTRY_ATTRS]) { + err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, + tb[MDBA_SET_ENTRY_ATTRS], + br_mdbe_attrs_pol, extack); + if (err) + return err; + if (mdb_attrs[MDBE_ATTR_SOURCE] && + !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE], + entry->addr.proto, extack)) + return -EINVAL; + } else { + memset(mdb_attrs, 0, + sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1)); + } + + return 0; +} + +static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_mdb_entry *entry, + struct nlattr **mdb_attrs, + struct netlink_ext_ack *extack) +{ + struct net_bridge_mdb_entry *mp, *star_mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip group, star_group; + unsigned long now = jiffies; + u8 filter_mode; + int err; + + __mdb_entry_to_br_ip(entry, &group, mdb_attrs); + + /* host join errors which can happen before creating the group */ + if (!port) { + /* don't allow any flags for host-joined groups */ + if (entry->state) { + NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); + return -EINVAL; + } + if (!br_multicast_is_star_g(&group)) { + NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); + return -EINVAL; + } + } + + mp = br_mdb_ip_get(br, &group); + if (!mp) { + mp = br_multicast_new_group(br, &group); + err = PTR_ERR_OR_ZERO(mp); + if (err) + return err; + } + + /* host join */ + if (!port) { + if (mp->host_joined) { + NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); + return -EEXIST; + } + + br_multicast_host_join(mp, false); + br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB); + + return 0; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->key.port == port) { + NL_SET_ERR_MSG_MOD(extack, "Group is already joined by port"); + return -EEXIST; + } + if ((unsigned long)p->key.port < (unsigned long)port) + break; + } + + filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE : + MCAST_INCLUDE; + + p = br_multicast_new_port_group(port, &group, *pp, entry->state, NULL, + filter_mode, RTPROT_STATIC); + if (unlikely(!p)) { + NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group"); + return -ENOMEM; + } + rcu_assign_pointer(*pp, p); + if (entry->state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); + br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); + /* if we are adding a new EXCLUDE port group (*,G) it needs to be also + * added to all S,G entries for proper replication, if we are adding + * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be + * added to it for proper replication + */ + if (br_multicast_should_handle_mode(br, group.proto)) { + switch (filter_mode) { + case MCAST_EXCLUDE: + br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE); + break; + case MCAST_INCLUDE: + star_group = p->key.addr; + memset(&star_group.src, 0, sizeof(star_group.src)); + star_mp = br_mdb_ip_get(br, &star_group); + if (star_mp) + br_multicast_sg_add_exclude_ports(star_mp, p); + break; + } + } + + return 0; +} + +static int __br_mdb_add(struct net *net, struct net_bridge *br, + struct net_bridge_port *p, + struct br_mdb_entry *entry, + struct nlattr **mdb_attrs, + struct netlink_ext_ack *extack) +{ + int ret; + + spin_lock_bh(&br->multicast_lock); + ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack); + spin_unlock_bh(&br->multicast_lock); + + return ret; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + struct net_device *dev, *pdev; + struct br_mdb_entry *entry; + struct net_bridge_vlan *v; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack); + if (err < 0) + return err; + + br = netdev_priv(dev); + + if (!netif_running(br->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running"); + return -EINVAL; + } + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { + NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled"); + return -EINVAL; + } + + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) { + NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist"); + return -ENODEV; + } + + p = br_port_get_rtnl(pdev); + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port"); + return -EINVAL; + } + + if (p->br != br) { + NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); + return -EINVAL; + } + if (p->state == BR_STATE_DISABLED) { + NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state"); + return -EINVAL; + } + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + + /* If vlan filtering is enabled and VLAN is not specified + * install mdb entry on all vlans configured on the port. + */ + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + entry->vid = v->vid; + err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack); + if (err) + break; + } + } else { + err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack); + } + + return err; +} + +static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry, + struct nlattr **mdb_attrs) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip ip; + int err = -EINVAL; + + if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return -EINVAL; + + __mdb_entry_to_br_ip(entry, &ip, mdb_attrs); + + spin_lock_bh(&br->multicast_lock); + mp = br_mdb_ip_get(br, &ip); + if (!mp) + goto unlock; + + /* host leave */ + if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) { + br_multicast_host_leave(mp, false); + err = 0; + br_mdb_notify(br->dev, mp, NULL, RTM_DELMDB); + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + goto unlock; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex) + continue; + + if (p->key.port->state == BR_STATE_DISABLED) + goto unlock; + + br_multicast_del_pg(mp, p, pp); + err = 0; + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} + +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + struct net_device *dev, *pdev; + struct br_mdb_entry *entry; + struct net_bridge_vlan *v; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack); + if (err < 0) + return err; + + br = netdev_priv(dev); + + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) + return -ENODEV; + + p = br_port_get_rtnl(pdev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + + /* If vlan filtering is enabled and VLAN is not specified + * delete mdb entry on all vlans configured on the port. + */ + if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + entry->vid = v->vid; + err = __br_mdb_del(br, entry, mdb_attrs); + } + } else { + err = __br_mdb_del(br, entry, mdb_attrs); + } + + return err; +} + +void br_mdb_init(void) +{ + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); +} + +void br_mdb_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETMDB); + rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); + rtnl_unregister(PF_BRIDGE, RTM_DELMDB); +} diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c new file mode 100644 index 000000000..3259f5480 --- /dev/null +++ b/net/bridge/br_mrp.c @@ -0,0 +1,1192 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/mrp_bridge.h> +#include "br_private_mrp.h" + +static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 }; +static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x3 }; + +static bool br_mrp_is_ring_port(struct net_bridge_port *p_port, + struct net_bridge_port *s_port, + struct net_bridge_port *port) +{ + if (port == p_port || + port == s_port) + return true; + + return false; +} + +static bool br_mrp_is_in_port(struct net_bridge_port *i_port, + struct net_bridge_port *port) +{ + if (port == i_port) + return true; + + return false; +} + +static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br, + u32 ifindex) +{ + struct net_bridge_port *res = NULL; + struct net_bridge_port *port; + + list_for_each_entry(port, &br->port_list, list) { + if (port->dev->ifindex == ifindex) { + res = port; + break; + } + } + + return res; +} + +static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) +{ + struct br_mrp *res = NULL; + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + if (mrp->ring_id == ring_id) { + res = mrp; + break; + } + } + + return res; +} + +static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id) +{ + struct br_mrp *res = NULL; + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + if (mrp->in_id == in_id) { + res = mrp; + break; + } + } + + return res; +} + +static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) +{ + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + struct net_bridge_port *p; + + p = rtnl_dereference(mrp->p_port); + if (p && p->dev->ifindex == ifindex) + return false; + + p = rtnl_dereference(mrp->s_port); + if (p && p->dev->ifindex == ifindex) + return false; + + p = rtnl_dereference(mrp->i_port); + if (p && p->dev->ifindex == ifindex) + return false; + } + + return true; +} + +static struct br_mrp *br_mrp_find_port(struct net_bridge *br, + struct net_bridge_port *p) +{ + struct br_mrp *res = NULL; + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + if (rcu_access_pointer(mrp->p_port) == p || + rcu_access_pointer(mrp->s_port) == p || + rcu_access_pointer(mrp->i_port) == p) { + res = mrp; + break; + } + } + + return res; +} + +static int br_mrp_next_seq(struct br_mrp *mrp) +{ + mrp->seq_id++; + return mrp->seq_id; +} + +static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p, + const u8 *src, const u8 *dst) +{ + struct ethhdr *eth_hdr; + struct sk_buff *skb; + __be16 *version; + + skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH); + if (!skb) + return NULL; + + skb->dev = p->dev; + skb->protocol = htons(ETH_P_MRP); + skb->priority = MRP_FRAME_PRIO; + skb_reserve(skb, sizeof(*eth_hdr)); + + eth_hdr = skb_push(skb, sizeof(*eth_hdr)); + ether_addr_copy(eth_hdr->h_dest, dst); + ether_addr_copy(eth_hdr->h_source, src); + eth_hdr->h_proto = htons(ETH_P_MRP); + + version = skb_put(skb, sizeof(*version)); + *version = cpu_to_be16(MRP_VERSION); + + return skb; +} + +static void br_mrp_skb_tlv(struct sk_buff *skb, + enum br_mrp_tlv_header_type type, + u8 length) +{ + struct br_mrp_tlv_hdr *hdr; + + hdr = skb_put(skb, sizeof(*hdr)); + hdr->type = type; + hdr->length = length; +} + +static void br_mrp_skb_common(struct sk_buff *skb, struct br_mrp *mrp) +{ + struct br_mrp_common_hdr *hdr; + + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_COMMON, sizeof(*hdr)); + + hdr = skb_put(skb, sizeof(*hdr)); + hdr->seq_id = cpu_to_be16(br_mrp_next_seq(mrp)); + memset(hdr->domain, 0xff, MRP_DOMAIN_UUID_LENGTH); +} + +static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp, + struct net_bridge_port *p, + enum br_mrp_port_role_type port_role) +{ + struct br_mrp_ring_test_hdr *hdr = NULL; + struct sk_buff *skb = NULL; + + if (!p) + return NULL; + + skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_test_dmac); + if (!skb) + return NULL; + + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_RING_TEST, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); + + hdr->prio = cpu_to_be16(mrp->prio); + ether_addr_copy(hdr->sa, p->br->dev->dev_addr); + hdr->port_role = cpu_to_be16(port_role); + hdr->state = cpu_to_be16(mrp->ring_state); + hdr->transitions = cpu_to_be16(mrp->ring_transitions); + hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies)); + + br_mrp_skb_common(skb, mrp); + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0); + + return skb; +} + +static struct sk_buff *br_mrp_alloc_in_test_skb(struct br_mrp *mrp, + struct net_bridge_port *p, + enum br_mrp_port_role_type port_role) +{ + struct br_mrp_in_test_hdr *hdr = NULL; + struct sk_buff *skb = NULL; + + if (!p) + return NULL; + + skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_in_test_dmac); + if (!skb) + return NULL; + + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_IN_TEST, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); + + hdr->id = cpu_to_be16(mrp->in_id); + ether_addr_copy(hdr->sa, p->br->dev->dev_addr); + hdr->port_role = cpu_to_be16(port_role); + hdr->state = cpu_to_be16(mrp->in_state); + hdr->transitions = cpu_to_be16(mrp->in_transitions); + hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies)); + + br_mrp_skb_common(skb, mrp); + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0); + + return skb; +} + +/* This function is continuously called in the following cases: + * - when node role is MRM, in this case test_monitor is always set to false + * because it needs to notify the userspace that the ring is open and needs to + * send MRP_Test frames + * - when node role is MRA, there are 2 subcases: + * - when MRA behaves as MRM, in this case is similar with MRM role + * - when MRA behaves as MRC, in this case test_monitor is set to true, + * because it needs to detect when it stops seeing MRP_Test frames + * from MRM node but it doesn't need to send MRP_Test frames. + */ +static void br_mrp_test_work_expired(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct br_mrp *mrp = container_of(del_work, struct br_mrp, test_work); + struct net_bridge_port *p; + bool notify_open = false; + struct sk_buff *skb; + + if (time_before_eq(mrp->test_end, jiffies)) + return; + + if (mrp->test_count_miss < mrp->test_max_miss) { + mrp->test_count_miss++; + } else { + /* Notify that the ring is open only if the ring state is + * closed, otherwise it would continue to notify at every + * interval. + * Also notify that the ring is open when the node has the + * role MRA and behaves as MRC. The reason is that the + * userspace needs to know when the MRM stopped sending + * MRP_Test frames so that the current node to try to take + * the role of a MRM. + */ + if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED || + mrp->test_monitor) + notify_open = true; + } + + rcu_read_lock(); + + p = rcu_dereference(mrp->p_port); + if (p) { + if (!mrp->test_monitor) { + skb = br_mrp_alloc_test_skb(mrp, p, + BR_MRP_PORT_ROLE_PRIMARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + } + + if (notify_open && !mrp->ring_role_offloaded) + br_mrp_ring_port_open(p->dev, true); + } + + p = rcu_dereference(mrp->s_port); + if (p) { + if (!mrp->test_monitor) { + skb = br_mrp_alloc_test_skb(mrp, p, + BR_MRP_PORT_ROLE_SECONDARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + } + + if (notify_open && !mrp->ring_role_offloaded) + br_mrp_ring_port_open(p->dev, true); + } + +out: + rcu_read_unlock(); + + queue_delayed_work(system_wq, &mrp->test_work, + usecs_to_jiffies(mrp->test_interval)); +} + +/* This function is continuously called when the node has the interconnect role + * MIM. It would generate interconnect test frames and will send them on all 3 + * ports. But will also check if it stop receiving interconnect test frames. + */ +static void br_mrp_in_test_work_expired(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct br_mrp *mrp = container_of(del_work, struct br_mrp, in_test_work); + struct net_bridge_port *p; + bool notify_open = false; + struct sk_buff *skb; + + if (time_before_eq(mrp->in_test_end, jiffies)) + return; + + if (mrp->in_test_count_miss < mrp->in_test_max_miss) { + mrp->in_test_count_miss++; + } else { + /* Notify that the interconnect ring is open only if the + * interconnect ring state is closed, otherwise it would + * continue to notify at every interval. + */ + if (mrp->in_state == BR_MRP_IN_STATE_CLOSED) + notify_open = true; + } + + rcu_read_lock(); + + p = rcu_dereference(mrp->p_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_PRIMARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + + p = rcu_dereference(mrp->s_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_SECONDARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + + p = rcu_dereference(mrp->i_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_INTER); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + +out: + rcu_read_unlock(); + + queue_delayed_work(system_wq, &mrp->in_test_work, + usecs_to_jiffies(mrp->in_test_interval)); +} + +/* Deletes the MRP instance. + * note: called under rtnl_lock + */ +static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) +{ + struct net_bridge_port *p; + u8 state; + + /* Stop sending MRP_Test frames */ + cancel_delayed_work_sync(&mrp->test_work); + br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0, 0); + + /* Stop sending MRP_InTest frames if has an interconnect role */ + cancel_delayed_work_sync(&mrp->in_test_work); + br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); + + br_mrp_switchdev_del(br, mrp); + + /* Reset the ports */ + p = rtnl_dereference(mrp->p_port); + if (p) { + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->p_port, NULL); + } + + p = rtnl_dereference(mrp->s_port); + if (p) { + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->s_port, NULL); + } + + p = rtnl_dereference(mrp->i_port); + if (p) { + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->i_port, NULL); + } + + list_del_rcu(&mrp->list); + kfree_rcu(mrp, rcu); +} + +/* Adds a new MRP instance. + * note: called under rtnl_lock + */ +int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) +{ + struct net_bridge_port *p; + struct br_mrp *mrp; + int err; + + /* If the ring exists, it is not possible to create another one with the + * same ring_id + */ + mrp = br_mrp_find_id(br, instance->ring_id); + if (mrp) + return -EINVAL; + + if (!br_mrp_get_port(br, instance->p_ifindex) || + !br_mrp_get_port(br, instance->s_ifindex)) + return -EINVAL; + + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, instance->p_ifindex) || + !br_mrp_unique_ifindex(br, instance->s_ifindex)) + return -EINVAL; + + mrp = kzalloc(sizeof(*mrp), GFP_KERNEL); + if (!mrp) + return -ENOMEM; + + mrp->ring_id = instance->ring_id; + mrp->prio = instance->prio; + + p = br_mrp_get_port(br, instance->p_ifindex); + spin_lock_bh(&br->lock); + p->state = BR_STATE_FORWARDING; + p->flags |= BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + rcu_assign_pointer(mrp->p_port, p); + + p = br_mrp_get_port(br, instance->s_ifindex); + spin_lock_bh(&br->lock); + p->state = BR_STATE_FORWARDING; + p->flags |= BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + rcu_assign_pointer(mrp->s_port, p); + + INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired); + INIT_DELAYED_WORK(&mrp->in_test_work, br_mrp_in_test_work_expired); + list_add_tail_rcu(&mrp->list, &br->mrp_list); + + err = br_mrp_switchdev_add(br, mrp); + if (err) + goto delete_mrp; + + return 0; + +delete_mrp: + br_mrp_del_impl(br, mrp); + + return err; +} + +/* Deletes the MRP instance from which the port is part of + * note: called under rtnl_lock + */ +void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p) +{ + struct br_mrp *mrp = br_mrp_find_port(br, p); + + /* If the port is not part of a MRP instance just bail out */ + if (!mrp) + return; + + br_mrp_del_impl(br, mrp); +} + +/* Deletes existing MRP instance based on ring_id + * note: called under rtnl_lock + */ +int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance) +{ + struct br_mrp *mrp = br_mrp_find_id(br, instance->ring_id); + + if (!mrp) + return -EINVAL; + + br_mrp_del_impl(br, mrp); + + return 0; +} + +/* Set port state, port state can be forwarding, blocked or disabled + * note: already called with rtnl_lock + */ +int br_mrp_set_port_state(struct net_bridge_port *p, + enum br_mrp_port_state_type state) +{ + u32 port_state; + + if (!p || !(p->flags & BR_MRP_AWARE)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + + if (state == BR_MRP_PORT_STATE_FORWARDING) + port_state = BR_STATE_FORWARDING; + else + port_state = BR_STATE_BLOCKING; + + p->state = port_state; + spin_unlock_bh(&p->br->lock); + + br_mrp_port_switchdev_set_state(p, port_state); + + return 0; +} + +/* Set port role, port role can be primary or secondary + * note: already called with rtnl_lock + */ +int br_mrp_set_port_role(struct net_bridge_port *p, + enum br_mrp_port_role_type role) +{ + struct br_mrp *mrp; + + if (!p || !(p->flags & BR_MRP_AWARE)) + return -EINVAL; + + mrp = br_mrp_find_port(p->br, p); + + if (!mrp) + return -EINVAL; + + switch (role) { + case BR_MRP_PORT_ROLE_PRIMARY: + rcu_assign_pointer(mrp->p_port, p); + break; + case BR_MRP_PORT_ROLE_SECONDARY: + rcu_assign_pointer(mrp->s_port, p); + break; + default: + return -EINVAL; + } + + br_mrp_port_switchdev_set_role(p, role); + + return 0; +} + +/* Set ring state, ring state can be only Open or Closed + * note: already called with rtnl_lock + */ +int br_mrp_set_ring_state(struct net_bridge *br, + struct br_mrp_ring_state *state) +{ + struct br_mrp *mrp = br_mrp_find_id(br, state->ring_id); + + if (!mrp) + return -EINVAL; + + if (mrp->ring_state != state->ring_state) + mrp->ring_transitions++; + + mrp->ring_state = state->ring_state; + + br_mrp_switchdev_set_ring_state(br, mrp, state->ring_state); + + return 0; +} + +/* Set ring role, ring role can be only MRM(Media Redundancy Manager) or + * MRC(Media Redundancy Client). + * note: already called with rtnl_lock + */ +int br_mrp_set_ring_role(struct net_bridge *br, + struct br_mrp_ring_role *role) +{ + struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id); + int err; + + if (!mrp) + return -EINVAL; + + mrp->ring_role = role->ring_role; + + /* If there is an error just bailed out */ + err = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role); + if (err && err != -EOPNOTSUPP) + return err; + + /* Now detect if the HW actually applied the role or not. If the HW + * applied the role it means that the SW will not to do those operations + * anymore. For example if the role ir MRM then the HW will notify the + * SW when ring is open, but if the is not pushed to the HW the SW will + * need to detect when the ring is open + */ + mrp->ring_role_offloaded = err == -EOPNOTSUPP ? 0 : 1; + + return 0; +} + +/* Start to generate or monitor MRP test frames, the frames are generated by + * HW and if it fails, they are generated by the SW. + * note: already called with rtnl_lock + */ +int br_mrp_start_test(struct net_bridge *br, + struct br_mrp_start_test *test) +{ + struct br_mrp *mrp = br_mrp_find_id(br, test->ring_id); + + if (!mrp) + return -EINVAL; + + /* Try to push it to the HW and if it fails then continue with SW + * implementation and if that also fails then return error. + */ + if (!br_mrp_switchdev_send_ring_test(br, mrp, test->interval, + test->max_miss, test->period, + test->monitor)) + return 0; + + mrp->test_interval = test->interval; + mrp->test_end = jiffies + usecs_to_jiffies(test->period); + mrp->test_max_miss = test->max_miss; + mrp->test_monitor = test->monitor; + mrp->test_count_miss = 0; + queue_delayed_work(system_wq, &mrp->test_work, + usecs_to_jiffies(test->interval)); + + return 0; +} + +/* Set in state, int state can be only Open or Closed + * note: already called with rtnl_lock + */ +int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state) +{ + struct br_mrp *mrp = br_mrp_find_in_id(br, state->in_id); + + if (!mrp) + return -EINVAL; + + if (mrp->in_state != state->in_state) + mrp->in_transitions++; + + mrp->in_state = state->in_state; + + br_mrp_switchdev_set_in_state(br, mrp, state->in_state); + + return 0; +} + +/* Set in role, in role can be only MIM(Media Interconnection Manager) or + * MIC(Media Interconnection Client). + * note: already called with rtnl_lock + */ +int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role) +{ + struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id); + struct net_bridge_port *p; + int err; + + if (!mrp) + return -EINVAL; + + if (!br_mrp_get_port(br, role->i_ifindex)) + return -EINVAL; + + if (role->in_role == BR_MRP_IN_ROLE_DISABLED) { + u8 state; + + /* It is not allowed to disable a port that doesn't exist */ + p = rtnl_dereference(mrp->i_port); + if (!p) + return -EINVAL; + + /* Stop the generating MRP_InTest frames */ + cancel_delayed_work_sync(&mrp->in_test_work); + br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); + + /* Remove the port */ + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->i_port, NULL); + + mrp->in_role = role->in_role; + mrp->in_id = 0; + + return 0; + } + + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, role->i_ifindex)) + return -EINVAL; + + /* It is not allowed to set a different interconnect port if the mrp + * instance has already one. First it needs to be disabled and after + * that set the new port + */ + if (rcu_access_pointer(mrp->i_port)) + return -EINVAL; + + p = br_mrp_get_port(br, role->i_ifindex); + spin_lock_bh(&br->lock); + p->state = BR_STATE_FORWARDING; + p->flags |= BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + rcu_assign_pointer(mrp->i_port, p); + + mrp->in_role = role->in_role; + mrp->in_id = role->in_id; + + /* If there is an error just bailed out */ + err = br_mrp_switchdev_set_in_role(br, mrp, role->in_id, + role->ring_id, role->in_role); + if (err && err != -EOPNOTSUPP) + return err; + + /* Now detect if the HW actually applied the role or not. If the HW + * applied the role it means that the SW will not to do those operations + * anymore. For example if the role is MIM then the HW will notify the + * SW when interconnect ring is open, but if the is not pushed to the HW + * the SW will need to detect when the interconnect ring is open. + */ + mrp->in_role_offloaded = err == -EOPNOTSUPP ? 0 : 1; + + return 0; +} + +/* Start to generate MRP_InTest frames, the frames are generated by + * HW and if it fails, they are generated by the SW. + * note: already called with rtnl_lock + */ +int br_mrp_start_in_test(struct net_bridge *br, + struct br_mrp_start_in_test *in_test) +{ + struct br_mrp *mrp = br_mrp_find_in_id(br, in_test->in_id); + + if (!mrp) + return -EINVAL; + + if (mrp->in_role != BR_MRP_IN_ROLE_MIM) + return -EINVAL; + + /* Try to push it to the HW and if it fails then continue with SW + * implementation and if that also fails then return error. + */ + if (!br_mrp_switchdev_send_in_test(br, mrp, in_test->interval, + in_test->max_miss, in_test->period)) + return 0; + + mrp->in_test_interval = in_test->interval; + mrp->in_test_end = jiffies + usecs_to_jiffies(in_test->period); + mrp->in_test_max_miss = in_test->max_miss; + mrp->in_test_count_miss = 0; + queue_delayed_work(system_wq, &mrp->in_test_work, + usecs_to_jiffies(in_test->interval)); + + return 0; +} + +/* Determin if the frame type is a ring frame */ +static bool br_mrp_ring_frame(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + if (hdr->type == BR_MRP_TLV_HEADER_RING_TEST || + hdr->type == BR_MRP_TLV_HEADER_RING_TOPO || + hdr->type == BR_MRP_TLV_HEADER_RING_LINK_DOWN || + hdr->type == BR_MRP_TLV_HEADER_RING_LINK_UP || + hdr->type == BR_MRP_TLV_HEADER_OPTION) + return true; + + return false; +} + +/* Determin if the frame type is an interconnect frame */ +static bool br_mrp_in_frame(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST || + hdr->type == BR_MRP_TLV_HEADER_IN_TOPO || + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN || + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP) + return true; + + return false; +} + +/* Process only MRP Test frame. All the other MRP frames are processed by + * userspace application + * note: already called with rcu_read_lock + */ +static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port, + struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return; + + if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST) + return; + + mrp->test_count_miss = 0; + + /* Notify the userspace that the ring is closed only when the ring is + * not closed + */ + if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED) + br_mrp_ring_port_open(port->dev, false); +} + +/* Determin if the test hdr has a better priority than the node */ +static bool br_mrp_test_better_than_own(struct br_mrp *mrp, + struct net_bridge *br, + const struct br_mrp_ring_test_hdr *hdr) +{ + u16 prio = be16_to_cpu(hdr->prio); + + if (prio < mrp->prio || + (prio == mrp->prio && + ether_addr_to_u64(hdr->sa) < ether_addr_to_u64(br->dev->dev_addr))) + return true; + + return false; +} + +/* Process only MRP Test frame. All the other MRP frames are processed by + * userspace application + * note: already called with rcu_read_lock + */ +static void br_mrp_mra_process(struct br_mrp *mrp, struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + const struct br_mrp_ring_test_hdr *test_hdr; + struct br_mrp_ring_test_hdr _test_hdr; + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return; + + if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST) + return; + + test_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr), + sizeof(_test_hdr), &_test_hdr); + if (!test_hdr) + return; + + /* Only frames that have a better priority than the node will + * clear the miss counter because otherwise the node will need to behave + * as MRM. + */ + if (br_mrp_test_better_than_own(mrp, br, test_hdr)) + mrp->test_count_miss = 0; +} + +/* Process only MRP InTest frame. All the other MRP frames are processed by + * userspace application + * note: already called with rcu_read_lock + */ +static bool br_mrp_mim_process(struct br_mrp *mrp, struct net_bridge_port *port, + struct sk_buff *skb) +{ + const struct br_mrp_in_test_hdr *in_hdr; + struct br_mrp_in_test_hdr _in_hdr; + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + /* The check for InTest frame type was already done */ + in_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr), + sizeof(_in_hdr), &_in_hdr); + if (!in_hdr) + return false; + + /* It needs to process only it's own InTest frames. */ + if (mrp->in_id != ntohs(in_hdr->id)) + return false; + + mrp->in_test_count_miss = 0; + + /* Notify the userspace that the ring is closed only when the ring is + * not closed + */ + if (mrp->in_state != BR_MRP_IN_STATE_CLOSED) + br_mrp_in_port_open(port->dev, false); + + return true; +} + +/* Get the MRP frame type + * note: already called with rcu_read_lock + */ +static u8 br_mrp_get_frame_type(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return 0xff; + + return hdr->type; +} + +static bool br_mrp_mrm_behaviour(struct br_mrp *mrp) +{ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRM || + (mrp->ring_role == BR_MRP_RING_ROLE_MRA && !mrp->test_monitor)) + return true; + + return false; +} + +static bool br_mrp_mrc_behaviour(struct br_mrp *mrp) +{ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRC || + (mrp->ring_role == BR_MRP_RING_ROLE_MRA && mrp->test_monitor)) + return true; + + return false; +} + +/* This will just forward the frame to the other mrp ring ports, depending on + * the frame type, ring role and interconnect role + * note: already called with rcu_read_lock + */ +static int br_mrp_rcv(struct net_bridge_port *p, + struct sk_buff *skb, struct net_device *dev) +{ + struct net_bridge_port *p_port, *s_port, *i_port = NULL; + struct net_bridge_port *p_dst, *s_dst, *i_dst = NULL; + struct net_bridge *br; + struct br_mrp *mrp; + + /* If port is disabled don't accept any frames */ + if (p->state == BR_STATE_DISABLED) + return 0; + + br = p->br; + mrp = br_mrp_find_port(br, p); + if (unlikely(!mrp)) + return 0; + + p_port = rcu_dereference(mrp->p_port); + if (!p_port) + return 0; + p_dst = p_port; + + s_port = rcu_dereference(mrp->s_port); + if (!s_port) + return 0; + s_dst = s_port; + + /* If the frame is a ring frame then it is not required to check the + * interconnect role and ports to process or forward the frame + */ + if (br_mrp_ring_frame(skb)) { + /* If the role is MRM then don't forward the frames */ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) { + br_mrp_mrm_process(mrp, p, skb); + goto no_forward; + } + + /* If the role is MRA then don't forward the frames if it + * behaves as MRM node + */ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) { + if (!mrp->test_monitor) { + br_mrp_mrm_process(mrp, p, skb); + goto no_forward; + } + + br_mrp_mra_process(mrp, br, p, skb); + } + + goto forward; + } + + if (br_mrp_in_frame(skb)) { + u8 in_type = br_mrp_get_frame_type(skb); + + i_port = rcu_dereference(mrp->i_port); + i_dst = i_port; + + /* If the ring port is in block state it should not forward + * In_Test frames + */ + if (br_mrp_is_ring_port(p_port, s_port, p) && + p->state == BR_STATE_BLOCKING && + in_type == BR_MRP_TLV_HEADER_IN_TEST) + goto no_forward; + + /* Nodes that behaves as MRM needs to stop forwarding the + * frames in case the ring is closed, otherwise will be a loop. + * In this case the frame is no forward between the ring ports. + */ + if (br_mrp_mrm_behaviour(mrp) && + br_mrp_is_ring_port(p_port, s_port, p) && + (s_port->state != BR_STATE_FORWARDING || + p_port->state != BR_STATE_FORWARDING)) { + p_dst = NULL; + s_dst = NULL; + } + + /* A node that behaves as MRC and doesn't have a interconnect + * role then it should forward all frames between the ring ports + * because it doesn't have an interconnect port + */ + if (br_mrp_mrc_behaviour(mrp) && + mrp->in_role == BR_MRP_IN_ROLE_DISABLED) + goto forward; + + if (mrp->in_role == BR_MRP_IN_ROLE_MIM) { + if (in_type == BR_MRP_TLV_HEADER_IN_TEST) { + /* MIM should not forward it's own InTest + * frames + */ + if (br_mrp_mim_process(mrp, p, skb)) { + goto no_forward; + } else { + if (br_mrp_is_ring_port(p_port, s_port, + p)) + i_dst = NULL; + + if (br_mrp_is_in_port(i_port, p)) + goto no_forward; + } + } else { + /* MIM should forward IntLinkChange and + * IntTopoChange between ring ports but MIM + * should not forward IntLinkChange and + * IntTopoChange if the frame was received at + * the interconnect port + */ + if (br_mrp_is_ring_port(p_port, s_port, p)) + i_dst = NULL; + + if (br_mrp_is_in_port(i_port, p)) + goto no_forward; + } + } + + if (mrp->in_role == BR_MRP_IN_ROLE_MIC) { + /* MIC should forward InTest frames on all ports + * regardless of the received port + */ + if (in_type == BR_MRP_TLV_HEADER_IN_TEST) + goto forward; + + /* MIC should forward IntLinkChange frames only if they + * are received on ring ports to all the ports + */ + if (br_mrp_is_ring_port(p_port, s_port, p) && + (in_type == BR_MRP_TLV_HEADER_IN_LINK_UP || + in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN)) + goto forward; + + /* Should forward the InTopo frames only between the + * ring ports + */ + if (in_type == BR_MRP_TLV_HEADER_IN_TOPO) { + i_dst = NULL; + goto forward; + } + + /* In all the other cases don't forward the frames */ + goto no_forward; + } + } + +forward: + if (p_dst) + br_forward(p_dst, skb, true, false); + if (s_dst) + br_forward(s_dst, skb, true, false); + if (i_dst) + br_forward(i_dst, skb, true, false); + +no_forward: + return 1; +} + +/* Check if the frame was received on a port that is part of MRP ring + * and if the frame has MRP eth. In that case process the frame otherwise do + * normal forwarding. + * note: already called with rcu_read_lock + */ +int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) +{ + /* If there is no MRP instance do normal forwarding */ + if (likely(!(p->flags & BR_MRP_AWARE))) + goto out; + + if (unlikely(skb->protocol == htons(ETH_P_MRP))) + return br_mrp_rcv(p, skb, p->dev); + +out: + return 0; +} + +bool br_mrp_enabled(struct net_bridge *br) +{ + return !list_empty(&br->mrp_list); +} diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c new file mode 100644 index 000000000..2a2fdf350 --- /dev/null +++ b/net/bridge/br_mrp_netlink.c @@ -0,0 +1,571 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <net/genetlink.h> + +#include <uapi/linux/mrp_bridge.h> +#include "br_private.h" +#include "br_private_mrp.h" + +static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = { + [IFLA_BRIDGE_MRP_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_IN_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_IN_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_START_IN_TEST] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = { + [IFLA_BRIDGE_MRP_INSTANCE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_INSTANCE_PRIO] = { .type = NLA_U16 }, +}; + +static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr, + int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1]; + struct br_mrp_instance inst; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_INSTANCE_MAX, attr, + br_mrp_instance_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] || + !tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or P_IFINDEX or S_IFINDEX"); + return -EINVAL; + } + + memset(&inst, 0, sizeof(inst)); + + inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]); + inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]); + inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]); + inst.prio = MRP_DEFAULT_PRIO; + + if (tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO]) + inst.prio = nla_get_u16(tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO]); + + if (cmd == RTM_SETLINK) + return br_mrp_add(br, &inst); + else + return br_mrp_del(br, &inst); + + return 0; +} + +static const struct nla_policy +br_mrp_port_state_policy[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_port_state_parse(struct net_bridge_port *p, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1]; + enum br_mrp_port_state_type state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_STATE_MAX, attr, + br_mrp_port_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing attribute: STATE"); + return -EINVAL; + } + + state = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]); + + return br_mrp_set_port_state(p, state); +} + +static const struct nla_policy +br_mrp_port_role_policy[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = { .type = NLA_U32 }, +}; + +static int br_mrp_port_role_parse(struct net_bridge_port *p, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1]; + enum br_mrp_port_role_type role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_ROLE_MAX, attr, + br_mrp_port_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing attribute: ROLE"); + return -EINVAL; + } + + role = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]); + + return br_mrp_set_port_role(p, role); +} + +static const struct nla_policy +br_mrp_ring_state_policy[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_RING_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_RING_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_ring_state_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1]; + struct br_mrp_ring_state state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_STATE_MAX, attr, + br_mrp_ring_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or STATE"); + return -EINVAL; + } + + memset(&state, 0x0, sizeof(state)); + + state.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]); + state.ring_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]); + + return br_mrp_set_ring_state(br, &state); +} + +static const struct nla_policy +br_mrp_ring_role_policy[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = { .type = NLA_U32 }, +}; + +static int br_mrp_ring_role_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1]; + struct br_mrp_ring_role role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_ROLE_MAX, attr, + br_mrp_ring_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or ROLE"); + return -EINVAL; + } + + memset(&role, 0x0, sizeof(role)); + + role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]); + role.ring_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]); + + return br_mrp_set_ring_role(br, &role); +} + +static const struct nla_policy +br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { + [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, +}; + +static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_START_TEST_MAX + 1]; + struct br_mrp_start_test test; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_TEST_MAX, attr, + br_mrp_start_test_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID] || + !tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL] || + !tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] || + !tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); + return -EINVAL; + } + + memset(&test, 0x0, sizeof(test)); + + test.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID]); + test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]); + test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]); + test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]); + test.monitor = false; + + if (tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR]) + test.monitor = + nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR]); + + return br_mrp_start_test(br, &test); +} + +static const struct nla_policy +br_mrp_in_state_policy[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_IN_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_in_state_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1]; + struct br_mrp_in_state state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_STATE_MAX, attr, + br_mrp_in_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID] || + !tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: IN_ID or STATE"); + return -EINVAL; + } + + memset(&state, 0x0, sizeof(state)); + + state.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID]); + state.in_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]); + + return br_mrp_set_in_state(br, &state); +} + +static const struct nla_policy +br_mrp_in_role_policy[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = { .type = NLA_U32 }, +}; + +static int br_mrp_in_role_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1]; + struct br_mrp_in_role role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_ROLE_MAX, attr, + br_mrp_in_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or ROLE or IN_ID or I_IFINDEX"); + return -EINVAL; + } + + memset(&role, 0x0, sizeof(role)); + + role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]); + role.in_id = nla_get_u16(tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID]); + role.i_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX]); + role.in_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]); + + return br_mrp_set_in_role(br, &role); +} + +static const struct nla_policy +br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { + [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, +}; + +static int br_mrp_start_in_test_parse(struct net_bridge *br, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1]; + struct br_mrp_start_in_test test; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_IN_TEST_MAX, attr, + br_mrp_start_in_test_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); + return -EINVAL; + } + + memset(&test, 0x0, sizeof(test)); + + test.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID]); + test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL]); + test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS]); + test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]); + + return br_mrp_start_in_test(br, &test); +} + +int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1]; + int err; + + /* When this function is called for a port then the br pointer is + * invalid, therefor set the br to point correctly + */ + if (p) + br = p->br; + + if (br->stp_enabled != BR_NO_STP) { + NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled"); + return -EINVAL; + } + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_MAX, attr, + br_mrp_policy, extack); + if (err) + return err; + + if (tb[IFLA_BRIDGE_MRP_INSTANCE]) { + err = br_mrp_instance_parse(br, tb[IFLA_BRIDGE_MRP_INSTANCE], + cmd, extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) { + err = br_mrp_port_state_parse(p, tb[IFLA_BRIDGE_MRP_PORT_STATE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) { + err = br_mrp_port_role_parse(p, tb[IFLA_BRIDGE_MRP_PORT_ROLE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_RING_STATE]) { + err = br_mrp_ring_state_parse(br, + tb[IFLA_BRIDGE_MRP_RING_STATE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) { + err = br_mrp_ring_role_parse(br, tb[IFLA_BRIDGE_MRP_RING_ROLE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_START_TEST]) { + err = br_mrp_start_test_parse(br, + tb[IFLA_BRIDGE_MRP_START_TEST], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_IN_STATE]) { + err = br_mrp_in_state_parse(br, tb[IFLA_BRIDGE_MRP_IN_STATE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_IN_ROLE]) { + err = br_mrp_in_role_parse(br, tb[IFLA_BRIDGE_MRP_IN_ROLE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_START_IN_TEST]) { + err = br_mrp_start_in_test_parse(br, + tb[IFLA_BRIDGE_MRP_START_IN_TEST], + extack); + if (err) + return err; + } + + return 0; +} + +int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + struct nlattr *tb, *mrp_tb; + struct br_mrp *mrp; + + mrp_tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP); + if (!mrp_tb) + return -EMSGSIZE; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list) { + struct net_bridge_port *p; + + tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO); + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ID, + mrp->ring_id)) + goto nla_put_failure; + + p = rcu_dereference(mrp->p_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + + p = rcu_dereference(mrp->s_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + + p = rcu_dereference(mrp->i_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO, + mrp->prio)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_STATE, + mrp->ring_state)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ROLE, + mrp->ring_role)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + mrp->test_interval)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + mrp->test_max_miss)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + mrp->test_monitor)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_STATE, + mrp->in_state)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_ROLE, + mrp->in_role)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + mrp->in_test_interval)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, + mrp->in_test_max_miss)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + } + nla_nest_end(skb, mrp_tb); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tb); + +nla_info_failure: + nla_nest_cancel(skb, mrp_tb); + + return -EMSGSIZE; +} + +int br_mrp_ring_port_open(struct net_device *dev, u8 loc) +{ + struct net_bridge_port *p; + int err = 0; + + p = br_port_get_rcu(dev); + if (!p) { + err = -EINVAL; + goto out; + } + + if (loc) + p->flags |= BR_MRP_LOST_CONT; + else + p->flags &= ~BR_MRP_LOST_CONT; + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + +out: + return err; +} + +int br_mrp_in_port_open(struct net_device *dev, u8 loc) +{ + struct net_bridge_port *p; + int err = 0; + + p = br_port_get_rcu(dev); + if (!p) { + err = -EINVAL; + goto out; + } + + if (loc) + p->flags |= BR_MRP_LOST_IN_CONT; + else + p->flags &= ~BR_MRP_LOST_IN_CONT; + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + +out: + return err; +} diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c new file mode 100644 index 000000000..75a7e8d0a --- /dev/null +++ b/net/bridge/br_mrp_switchdev.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <net/switchdev.h> + +#include "br_private_mrp.h" + +int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp) +{ + struct switchdev_obj_mrp mrp_obj = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_MRP, + .p_port = rtnl_dereference(mrp->p_port)->dev, + .s_port = rtnl_dereference(mrp->s_port)->dev, + .ring_id = mrp->ring_id, + .prio = mrp->prio, + }; + int err; + + err = switchdev_port_obj_add(br->dev, &mrp_obj.obj, NULL); + + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp) +{ + struct switchdev_obj_mrp mrp_obj = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_MRP, + .p_port = NULL, + .s_port = NULL, + .ring_id = mrp->ring_id, + }; + int err; + + err = switchdev_port_obj_del(br->dev, &mrp_obj.obj); + + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +int br_mrp_switchdev_set_ring_role(struct net_bridge *br, + struct br_mrp *mrp, + enum br_mrp_ring_role_type role) +{ + struct switchdev_obj_ring_role_mrp mrp_role = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_RING_ROLE_MRP, + .ring_role = role, + .ring_id = mrp->ring_id, + }; + int err; + + if (role == BR_MRP_RING_ROLE_DISABLED) + err = switchdev_port_obj_del(br->dev, &mrp_role.obj); + else + err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL); + + return err; +} + +int br_mrp_switchdev_send_ring_test(struct net_bridge *br, + struct br_mrp *mrp, u32 interval, + u8 max_miss, u32 period, + bool monitor) +{ + struct switchdev_obj_ring_test_mrp test = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_RING_TEST_MRP, + .interval = interval, + .max_miss = max_miss, + .ring_id = mrp->ring_id, + .period = period, + .monitor = monitor, + }; + int err; + + if (interval == 0) + err = switchdev_port_obj_del(br->dev, &test.obj); + else + err = switchdev_port_obj_add(br->dev, &test.obj, NULL); + + return err; +} + +int br_mrp_switchdev_set_ring_state(struct net_bridge *br, + struct br_mrp *mrp, + enum br_mrp_ring_state_type state) +{ + struct switchdev_obj_ring_state_mrp mrp_state = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_RING_STATE_MRP, + .ring_state = state, + .ring_id = mrp->ring_id, + }; + int err; + + err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL); + + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, + u16 in_id, u32 ring_id, + enum br_mrp_in_role_type role) +{ + struct switchdev_obj_in_role_mrp mrp_role = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_ROLE_MRP, + .in_role = role, + .in_id = mrp->in_id, + .ring_id = mrp->ring_id, + .i_port = rtnl_dereference(mrp->i_port)->dev, + }; + int err; + + if (role == BR_MRP_IN_ROLE_DISABLED) + err = switchdev_port_obj_del(br->dev, &mrp_role.obj); + else + err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL); + + return err; +} + +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_in_state_type state) +{ + struct switchdev_obj_in_state_mrp mrp_state = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_STATE_MRP, + .in_state = state, + .in_id = mrp->in_id, + }; + int err; + + err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL); + + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period) +{ + struct switchdev_obj_in_test_mrp test = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_TEST_MRP, + .interval = interval, + .max_miss = max_miss, + .in_id = mrp->in_id, + .period = period, + }; + int err; + + if (interval == 0) + err = switchdev_port_obj_del(br->dev, &test.obj); + else + err = switchdev_port_obj_add(br->dev, &test.obj, NULL); + + return err; +} + +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, + .u.stp_state = state, + }; + int err; + + err = switchdev_port_attr_set(p->dev, &attr); + if (err && err != -EOPNOTSUPP) + br_warn(p->br, "error setting offload MRP state on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + + return err; +} + +int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, + enum br_mrp_port_role_type role) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + .u.mrp_port_role = role, + }; + int err; + + err = switchdev_port_attr_set(p->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c new file mode 100644 index 000000000..e5328a277 --- /dev/null +++ b/net/bridge/br_multicast.c @@ -0,0 +1,3933 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Bridge multicast support. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <linux/err.h> +#include <linux/export.h> +#include <linux/if_ether.h> +#include <linux/igmp.h> +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/netdevice.h> +#include <linux/netfilter_bridge.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/inetdevice.h> +#include <linux/mroute.h> +#include <net/ip.h> +#include <net/switchdev.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/ip6_checksum.h> +#include <net/addrconf.h> +#endif + +#include "br_private.h" + +static const struct rhashtable_params br_mdb_rht_params = { + .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode), + .key_offset = offsetof(struct net_bridge_mdb_entry, addr), + .key_len = sizeof(struct br_ip), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params br_sg_port_rht_params = { + .head_offset = offsetof(struct net_bridge_port_group, rhnode), + .key_offset = offsetof(struct net_bridge_port_group, key), + .key_len = sizeof(struct net_bridge_port_group_sg_key), + .automatic_shrinking = true, +}; + +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_own_query *query); +static void br_multicast_add_router(struct net_bridge *br, + struct net_bridge_port *port); +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid, + const unsigned char *src); +static void br_multicast_port_group_rexmit(struct timer_list *t); + +static void __del_port_router(struct net_bridge_port *p); +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid, const unsigned char *src); +#endif +static struct net_bridge_port_group * +__br_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + const unsigned char *src, + u8 filter_mode, + bool igmpv2_mldv1, + bool blocked); +static void br_multicast_find_del_pg(struct net_bridge *br, + struct net_bridge_port_group *pg); + +static struct net_bridge_port_group * +br_sg_port_find(struct net_bridge *br, + struct net_bridge_port_group_sg_key *sg_p) +{ + lockdep_assert_held_once(&br->multicast_lock); + + return rhashtable_lookup_fast(&br->sg_port_tbl, sg_p, + br_sg_port_rht_params); +} + +static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br, + struct br_ip *dst) +{ + return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params); +} + +struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br, + struct br_ip *dst) +{ + struct net_bridge_mdb_entry *ent; + + lockdep_assert_held_once(&br->multicast_lock); + + rcu_read_lock(); + ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params); + rcu_read_unlock(); + + return ent; +} + +static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br, + __be32 dst, __u16 vid) +{ + struct br_ip br_dst; + + memset(&br_dst, 0, sizeof(br_dst)); + br_dst.dst.ip4 = dst; + br_dst.proto = htons(ETH_P_IP); + br_dst.vid = vid; + + return br_mdb_ip_get(br, &br_dst); +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, + const struct in6_addr *dst, + __u16 vid) +{ + struct br_ip br_dst; + + memset(&br_dst, 0, sizeof(br_dst)); + br_dst.dst.ip6 = *dst; + br_dst.proto = htons(ETH_P_IPV6); + br_dst.vid = vid; + + return br_mdb_ip_get(br, &br_dst); +} +#endif + +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid) +{ + struct br_ip ip; + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return NULL; + + if (BR_INPUT_SKB_CB(skb)->igmp) + return NULL; + + memset(&ip, 0, sizeof(ip)); + ip.proto = skb->protocol; + ip.vid = vid; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ip.dst.ip4 = ip_hdr(skb)->daddr; + if (br->multicast_igmp_version == 3) { + struct net_bridge_mdb_entry *mdb; + + ip.src.ip4 = ip_hdr(skb)->saddr; + mdb = br_mdb_ip_get_rcu(br, &ip); + if (mdb) + return mdb; + ip.src.ip4 = 0; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ip.dst.ip6 = ipv6_hdr(skb)->daddr; + if (br->multicast_mld_version == 2) { + struct net_bridge_mdb_entry *mdb; + + ip.src.ip6 = ipv6_hdr(skb)->saddr; + mdb = br_mdb_ip_get_rcu(br, &ip); + if (mdb) + return mdb; + memset(&ip.src.ip6, 0, sizeof(ip.src.ip6)); + } + break; +#endif + default: + return NULL; + } + + return br_mdb_ip_get_rcu(br, &ip); +} + +static bool br_port_group_equal(struct net_bridge_port_group *p, + struct net_bridge_port *port, + const unsigned char *src) +{ + if (p->key.port != port) + return false; + + if (!(port->flags & BR_MULTICAST_TO_UNICAST)) + return true; + + return ether_addr_equal(src, p->eth_addr); +} + +static void __fwd_add_star_excl(struct net_bridge_port_group *pg, + struct br_ip *sg_ip) +{ + struct net_bridge_port_group_sg_key sg_key; + struct net_bridge *br = pg->key.port->br; + struct net_bridge_port_group *src_pg; + + memset(&sg_key, 0, sizeof(sg_key)); + sg_key.port = pg->key.port; + sg_key.addr = *sg_ip; + if (br_sg_port_find(br, &sg_key)) + return; + + src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr, + MCAST_INCLUDE, false, false); + if (IS_ERR_OR_NULL(src_pg) || + src_pg->rt_protocol != RTPROT_KERNEL) + return; + + src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL; +} + +static void __fwd_del_star_excl(struct net_bridge_port_group *pg, + struct br_ip *sg_ip) +{ + struct net_bridge_port_group_sg_key sg_key; + struct net_bridge *br = pg->key.port->br; + struct net_bridge_port_group *src_pg; + + memset(&sg_key, 0, sizeof(sg_key)); + sg_key.port = pg->key.port; + sg_key.addr = *sg_ip; + src_pg = br_sg_port_find(br, &sg_key); + if (!src_pg || !(src_pg->flags & MDB_PG_FLAGS_STAR_EXCL) || + src_pg->rt_protocol != RTPROT_KERNEL) + return; + + br_multicast_find_del_pg(br, src_pg); +} + +/* When a port group transitions to (or is added as) EXCLUDE we need to add it + * to all other ports' S,G entries which are not blocked by the current group + * for proper replication, the assumption is that any S,G blocked entries + * are already added so the S,G,port lookup should skip them. + * When a port group transitions from EXCLUDE -> INCLUDE mode or is being + * deleted we need to remove it from all ports' S,G entries where it was + * automatically installed before (i.e. where it's MDB_PG_FLAGS_STAR_EXCL). + */ +void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, + u8 filter_mode) +{ + struct net_bridge *br = pg->key.port->br; + struct net_bridge_port_group *pg_lst; + struct net_bridge_mdb_entry *mp; + struct br_ip sg_ip; + + if (WARN_ON(!br_multicast_is_star_g(&pg->key.addr))) + return; + + mp = br_mdb_ip_get(br, &pg->key.addr); + if (!mp) + return; + + memset(&sg_ip, 0, sizeof(sg_ip)); + sg_ip = pg->key.addr; + for (pg_lst = mlock_dereference(mp->ports, br); + pg_lst; + pg_lst = mlock_dereference(pg_lst->next, br)) { + struct net_bridge_group_src *src_ent; + + if (pg_lst == pg) + continue; + hlist_for_each_entry(src_ent, &pg_lst->src_list, node) { + if (!(src_ent->flags & BR_SGRP_F_INSTALLED)) + continue; + sg_ip.src = src_ent->addr.src; + switch (filter_mode) { + case MCAST_INCLUDE: + __fwd_del_star_excl(pg, &sg_ip); + break; + case MCAST_EXCLUDE: + __fwd_add_star_excl(pg, &sg_ip); + break; + } + } + } +} + +/* called when adding a new S,G with host_joined == false by default */ +static void br_multicast_sg_host_state(struct net_bridge_mdb_entry *star_mp, + struct net_bridge_port_group *sg) +{ + struct net_bridge_mdb_entry *sg_mp; + + if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) + return; + if (!star_mp->host_joined) + return; + + sg_mp = br_mdb_ip_get(star_mp->br, &sg->key.addr); + if (!sg_mp) + return; + sg_mp->host_joined = true; +} + +/* set the host_joined state of all of *,G's S,G entries */ +static void br_multicast_star_g_host_state(struct net_bridge_mdb_entry *star_mp) +{ + struct net_bridge *br = star_mp->br; + struct net_bridge_mdb_entry *sg_mp; + struct net_bridge_port_group *pg; + struct br_ip sg_ip; + + if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) + return; + + memset(&sg_ip, 0, sizeof(sg_ip)); + sg_ip = star_mp->addr; + for (pg = mlock_dereference(star_mp->ports, br); + pg; + pg = mlock_dereference(pg->next, br)) { + struct net_bridge_group_src *src_ent; + + hlist_for_each_entry(src_ent, &pg->src_list, node) { + if (!(src_ent->flags & BR_SGRP_F_INSTALLED)) + continue; + sg_ip.src = src_ent->addr.src; + sg_mp = br_mdb_ip_get(br, &sg_ip); + if (!sg_mp) + continue; + sg_mp->host_joined = star_mp->host_joined; + } + } +} + +static void br_multicast_sg_del_exclude_ports(struct net_bridge_mdb_entry *sgmp) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + + /* *,G exclude ports are only added to S,G entries */ + if (WARN_ON(br_multicast_is_star_g(&sgmp->addr))) + return; + + /* we need the STAR_EXCLUDE ports if there are non-STAR_EXCLUDE ports + * we should ignore perm entries since they're managed by user-space + */ + for (pp = &sgmp->ports; + (p = mlock_dereference(*pp, sgmp->br)) != NULL; + pp = &p->next) + if (!(p->flags & (MDB_PG_FLAGS_STAR_EXCL | + MDB_PG_FLAGS_PERMANENT))) + return; + + /* currently the host can only have joined the *,G which means + * we treat it as EXCLUDE {}, so for an S,G it's considered a + * STAR_EXCLUDE entry and we can safely leave it + */ + sgmp->host_joined = false; + + for (pp = &sgmp->ports; + (p = mlock_dereference(*pp, sgmp->br)) != NULL;) { + if (!(p->flags & MDB_PG_FLAGS_PERMANENT)) + br_multicast_del_pg(sgmp, p, pp); + else + pp = &p->next; + } +} + +void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, + struct net_bridge_port_group *sg) +{ + struct net_bridge_port_group_sg_key sg_key; + struct net_bridge *br = star_mp->br; + struct net_bridge_port_group *pg; + + if (WARN_ON(br_multicast_is_star_g(&sg->key.addr))) + return; + if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr))) + return; + + br_multicast_sg_host_state(star_mp, sg); + memset(&sg_key, 0, sizeof(sg_key)); + sg_key.addr = sg->key.addr; + /* we need to add all exclude ports to the S,G */ + for (pg = mlock_dereference(star_mp->ports, br); + pg; + pg = mlock_dereference(pg->next, br)) { + struct net_bridge_port_group *src_pg; + + if (pg == sg || pg->filter_mode == MCAST_INCLUDE) + continue; + + sg_key.port = pg->key.port; + if (br_sg_port_find(br, &sg_key)) + continue; + + src_pg = __br_multicast_add_group(br, pg->key.port, + &sg->key.addr, + sg->eth_addr, + MCAST_INCLUDE, false, false); + if (IS_ERR_OR_NULL(src_pg) || + src_pg->rt_protocol != RTPROT_KERNEL) + continue; + src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL; + } +} + +static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) +{ + struct net_bridge_mdb_entry *star_mp; + struct net_bridge_port_group *sg; + struct br_ip sg_ip; + + if (src->flags & BR_SGRP_F_INSTALLED) + return; + + memset(&sg_ip, 0, sizeof(sg_ip)); + sg_ip = src->pg->key.addr; + sg_ip.src = src->addr.src; + sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip, + src->pg->eth_addr, MCAST_INCLUDE, false, + !timer_pending(&src->timer)); + if (IS_ERR_OR_NULL(sg)) + return; + src->flags |= BR_SGRP_F_INSTALLED; + sg->flags &= ~MDB_PG_FLAGS_STAR_EXCL; + + /* if it was added by user-space as perm we can skip next steps */ + if (sg->rt_protocol != RTPROT_KERNEL && + (sg->flags & MDB_PG_FLAGS_PERMANENT)) + return; + + /* the kernel is now responsible for removing this S,G */ + del_timer(&sg->timer); + star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr); + if (!star_mp) + return; + + br_multicast_sg_add_exclude_ports(star_mp, sg); +} + +static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src) +{ + struct net_bridge_port_group *p, *pg = src->pg; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_entry *mp; + struct br_ip sg_ip; + + memset(&sg_ip, 0, sizeof(sg_ip)); + sg_ip = pg->key.addr; + sg_ip.src = src->addr.src; + + mp = br_mdb_ip_get(src->br, &sg_ip); + if (!mp) + return; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, src->br)) != NULL; + pp = &p->next) { + if (!br_port_group_equal(p, pg->key.port, pg->eth_addr)) + continue; + + if (p->rt_protocol != RTPROT_KERNEL && + (p->flags & MDB_PG_FLAGS_PERMANENT)) + break; + + br_multicast_del_pg(mp, p, pp); + break; + } + src->flags &= ~BR_SGRP_F_INSTALLED; +} + +/* install S,G and based on src's timer enable or disable forwarding */ +static void br_multicast_fwd_src_handle(struct net_bridge_group_src *src) +{ + struct net_bridge_port_group_sg_key sg_key; + struct net_bridge_port_group *sg; + u8 old_flags; + + br_multicast_fwd_src_add(src); + + memset(&sg_key, 0, sizeof(sg_key)); + sg_key.addr = src->pg->key.addr; + sg_key.addr.src = src->addr.src; + sg_key.port = src->pg->key.port; + + sg = br_sg_port_find(src->br, &sg_key); + if (!sg || (sg->flags & MDB_PG_FLAGS_PERMANENT)) + return; + + old_flags = sg->flags; + if (timer_pending(&src->timer)) + sg->flags &= ~MDB_PG_FLAGS_BLOCKED; + else + sg->flags |= MDB_PG_FLAGS_BLOCKED; + + if (old_flags != sg->flags) { + struct net_bridge_mdb_entry *sg_mp; + + sg_mp = br_mdb_ip_get(src->br, &sg_key.addr); + if (!sg_mp) + return; + br_mdb_notify(src->br->dev, sg_mp, sg, RTM_NEWMDB); + } +} + +static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_mdb_entry *mp; + + mp = container_of(gc, struct net_bridge_mdb_entry, mcast_gc); + WARN_ON(!hlist_unhashed(&mp->mdb_node)); + WARN_ON(mp->ports); + + del_timer_sync(&mp->timer); + kfree_rcu(mp, rcu); +} + +static void br_multicast_del_mdb_entry(struct net_bridge_mdb_entry *mp) +{ + struct net_bridge *br = mp->br; + + rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode, + br_mdb_rht_params); + hlist_del_init_rcu(&mp->mdb_node); + hlist_add_head(&mp->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); +} + +static void br_multicast_group_expired(struct timer_list *t) +{ + struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer); + struct net_bridge *br = mp->br; + + spin_lock(&br->multicast_lock); + if (hlist_unhashed(&mp->mdb_node) || !netif_running(br->dev) || + timer_pending(&mp->timer)) + goto out; + + br_multicast_host_leave(mp, true); + + if (mp->ports) + goto out; + br_multicast_del_mdb_entry(mp); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_src *src; + + src = container_of(gc, struct net_bridge_group_src, mcast_gc); + WARN_ON(!hlist_unhashed(&src->node)); + + del_timer_sync(&src->timer); + kfree_rcu(src, rcu); +} + +static void br_multicast_del_group_src(struct net_bridge_group_src *src) +{ + struct net_bridge *br = src->pg->key.port->br; + + br_multicast_fwd_src_remove(src); + hlist_del_init_rcu(&src->node); + src->pg->src_ents--; + hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); +} + +static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_port_group *pg; + + pg = container_of(gc, struct net_bridge_port_group, mcast_gc); + WARN_ON(!hlist_unhashed(&pg->mglist)); + WARN_ON(!hlist_empty(&pg->src_list)); + + del_timer_sync(&pg->rexmit_timer); + del_timer_sync(&pg->timer); + kfree_rcu(pg, rcu); +} + +void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + struct net_bridge_port_group __rcu **pp) +{ + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_src *ent; + struct hlist_node *tmp; + + rcu_assign_pointer(*pp, pg->next); + hlist_del_init(&pg->mglist); + hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) + br_multicast_del_group_src(ent); + br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); + if (!br_multicast_is_star_g(&mp->addr)) { + rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode, + br_sg_port_rht_params); + br_multicast_sg_del_exclude_ports(mp); + } else { + br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE); + } + hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); + + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); +} + +static void br_multicast_find_del_pg(struct net_bridge *br, + struct net_bridge_port_group *pg) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + + mp = br_mdb_ip_get(br, &pg->key.addr); + if (WARN_ON(!mp)) + return; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p != pg) + continue; + + br_multicast_del_pg(mp, pg, pp); + return; + } + + WARN_ON(1); +} + +static void br_multicast_port_group_expired(struct timer_list *t) +{ + struct net_bridge_port_group *pg = from_timer(pg, t, timer); + struct net_bridge_group_src *src_ent; + struct net_bridge *br = pg->key.port->br; + struct hlist_node *tmp; + bool changed; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || timer_pending(&pg->timer) || + hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT) + goto out; + + changed = !!(pg->filter_mode == MCAST_EXCLUDE); + pg->filter_mode = MCAST_INCLUDE; + hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { + if (!timer_pending(&src_ent->timer)) { + br_multicast_del_group_src(src_ent); + changed = true; + } + } + + if (hlist_empty(&pg->src_list)) { + br_multicast_find_del_pg(br, pg); + } else if (changed) { + struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->key.addr); + + if (changed && br_multicast_is_star_g(&pg->key.addr)) + br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE); + + if (WARN_ON(!mp)) + goto out; + br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB); + } +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_gc(struct hlist_head *head) +{ + struct net_bridge_mcast_gc *gcent; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(gcent, tmp, head, gc_node) { + hlist_del_init(&gcent->gc_node); + gcent->destroy(gcent); + } +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, + struct net_bridge_port_group *pg, + __be32 ip_dst, __be32 group, + bool with_srcs, bool over_lmqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) +{ + struct net_bridge_port *p = pg ? pg->key.port : NULL; + struct net_bridge_group_src *ent; + size_t pkt_size, igmp_hdr_size; + unsigned long now = jiffies; + struct igmpv3_query *ihv3; + void *csum_start = NULL; + __sum16 *csum = NULL; + struct sk_buff *skb; + struct igmphdr *ih; + struct ethhdr *eth; + unsigned long lmqt; + struct iphdr *iph; + u16 lmqt_srcs = 0; + + igmp_hdr_size = sizeof(*ih); + if (br->multicast_igmp_version == 3) { + igmp_hdr_size = sizeof(*ihv3); + if (pg && with_srcs) { + lmqt = now + (br->multicast_last_member_interval * + br->multicast_last_member_count); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_lmqt == time_after(ent->timer.expires, + lmqt) && + ent->src_query_rexmit_cnt > 0) + lmqt_srcs++; + } + + if (!lmqt_srcs) + return NULL; + igmp_hdr_size += lmqt_srcs * sizeof(__be32); + } + } + + pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size; + if ((p && pkt_size > p->dev->mtu) || + pkt_size > br->dev->mtu) + return NULL; + + skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IP); + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + ether_addr_copy(eth->h_source, br->dev->dev_addr); + ip_eth_mc_map(ip_dst, eth->h_dest); + eth->h_proto = htons(ETH_P_IP); + skb_put(skb, sizeof(*eth)); + + skb_set_network_header(skb, skb->len); + iph = ip_hdr(skb); + iph->tot_len = htons(pkt_size - sizeof(*eth)); + + iph->version = 4; + iph->ihl = 6; + iph->tos = 0xc0; + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = 1; + iph->protocol = IPPROTO_IGMP; + iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; + iph->daddr = ip_dst; + ((u8 *)&iph[1])[0] = IPOPT_RA; + ((u8 *)&iph[1])[1] = 4; + ((u8 *)&iph[1])[2] = 0; + ((u8 *)&iph[1])[3] = 0; + ip_send_check(iph); + skb_put(skb, 24); + + skb_set_transport_header(skb, skb->len); + *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; + + switch (br->multicast_igmp_version) { + case 2: + ih = igmp_hdr(skb); + ih->type = IGMP_HOST_MEMBERSHIP_QUERY; + ih->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ih->group = group; + ih->csum = 0; + csum = &ih->csum; + csum_start = (void *)ih; + break; + case 3: + ihv3 = igmpv3_query_hdr(skb); + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; + ihv3->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ihv3->group = group; + ihv3->qqic = br->multicast_query_interval / HZ; + ihv3->nsrcs = htons(lmqt_srcs); + ihv3->resv = 0; + ihv3->suppress = sflag; + ihv3->qrv = 2; + ihv3->csum = 0; + csum = &ihv3->csum; + csum_start = (void *)ihv3; + if (!pg || !with_srcs) + break; + + lmqt_srcs = 0; + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_lmqt == time_after(ent->timer.expires, + lmqt) && + ent->src_query_rexmit_cnt > 0) { + ihv3->srcs[lmqt_srcs++] = ent->addr.src.ip4; + ent->src_query_rexmit_cnt--; + if (need_rexmit && ent->src_query_rexmit_cnt) + *need_rexmit = true; + } + } + if (WARN_ON(lmqt_srcs != ntohs(ihv3->nsrcs))) { + kfree_skb(skb); + return NULL; + } + break; + } + + if (WARN_ON(!csum || !csum_start)) { + kfree_skb(skb); + return NULL; + } + + *csum = ip_compute_csum(csum_start, igmp_hdr_size); + skb_put(skb, igmp_hdr_size); + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, + struct net_bridge_port_group *pg, + const struct in6_addr *ip6_dst, + const struct in6_addr *group, + bool with_srcs, bool over_llqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) +{ + struct net_bridge_port *p = pg ? pg->key.port : NULL; + struct net_bridge_group_src *ent; + size_t pkt_size, mld_hdr_size; + unsigned long now = jiffies; + struct mld2_query *mld2q; + void *csum_start = NULL; + unsigned long interval; + __sum16 *csum = NULL; + struct ipv6hdr *ip6h; + struct mld_msg *mldq; + struct sk_buff *skb; + unsigned long llqt; + struct ethhdr *eth; + u16 llqt_srcs = 0; + u8 *hopopt; + + mld_hdr_size = sizeof(*mldq); + if (br->multicast_mld_version == 2) { + mld_hdr_size = sizeof(*mld2q); + if (pg && with_srcs) { + llqt = now + (br->multicast_last_member_interval * + br->multicast_last_member_count); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_llqt == time_after(ent->timer.expires, + llqt) && + ent->src_query_rexmit_cnt > 0) + llqt_srcs++; + } + + if (!llqt_srcs) + return NULL; + mld_hdr_size += llqt_srcs * sizeof(struct in6_addr); + } + } + + pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size; + if ((p && pkt_size > p->dev->mtu) || + pkt_size > br->dev->mtu) + return NULL; + + skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IPV6); + + /* Ethernet header */ + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + ether_addr_copy(eth->h_source, br->dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + skb_put(skb, sizeof(*eth)); + + /* IPv6 header + HbH option */ + skb_set_network_header(skb, skb->len); + ip6h = ipv6_hdr(skb); + + *(__force __be32 *)ip6h = htonl(0x60000000); + ip6h->payload_len = htons(8 + mld_hdr_size); + ip6h->nexthdr = IPPROTO_HOPOPTS; + ip6h->hop_limit = 1; + ip6h->daddr = *ip6_dst; + if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + kfree_skb(skb); + br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false); + return NULL; + } + + br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); + + hopopt = (u8 *)(ip6h + 1); + hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ + hopopt[1] = 0; /* length of HbH */ + hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ + hopopt[3] = 2; /* Length of RA Option */ + hopopt[4] = 0; /* Type = 0x0000 (MLD) */ + hopopt[5] = 0; + hopopt[6] = IPV6_TLV_PAD1; /* Pad1 */ + hopopt[7] = IPV6_TLV_PAD1; /* Pad1 */ + + skb_put(skb, sizeof(*ip6h) + 8); + + /* ICMPv6 */ + skb_set_transport_header(skb, skb->len); + interval = ipv6_addr_any(group) ? + br->multicast_query_response_interval : + br->multicast_last_member_interval; + *igmp_type = ICMPV6_MGM_QUERY; + switch (br->multicast_mld_version) { + case 1: + mldq = (struct mld_msg *)icmp6_hdr(skb); + mldq->mld_type = ICMPV6_MGM_QUERY; + mldq->mld_code = 0; + mldq->mld_cksum = 0; + mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); + mldq->mld_reserved = 0; + mldq->mld_mca = *group; + csum = &mldq->mld_cksum; + csum_start = (void *)mldq; + break; + case 2: + mld2q = (struct mld2_query *)icmp6_hdr(skb); + mld2q->mld2q_mrc = htons((u16)jiffies_to_msecs(interval)); + mld2q->mld2q_type = ICMPV6_MGM_QUERY; + mld2q->mld2q_code = 0; + mld2q->mld2q_cksum = 0; + mld2q->mld2q_resv1 = 0; + mld2q->mld2q_resv2 = 0; + mld2q->mld2q_suppress = sflag; + mld2q->mld2q_qrv = 2; + mld2q->mld2q_nsrcs = htons(llqt_srcs); + mld2q->mld2q_qqic = br->multicast_query_interval / HZ; + mld2q->mld2q_mca = *group; + csum = &mld2q->mld2q_cksum; + csum_start = (void *)mld2q; + if (!pg || !with_srcs) + break; + + llqt_srcs = 0; + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_llqt == time_after(ent->timer.expires, + llqt) && + ent->src_query_rexmit_cnt > 0) { + mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.src.ip6; + ent->src_query_rexmit_cnt--; + if (need_rexmit && ent->src_query_rexmit_cnt) + *need_rexmit = true; + } + } + if (WARN_ON(llqt_srcs != ntohs(mld2q->mld2q_nsrcs))) { + kfree_skb(skb); + return NULL; + } + break; + } + + if (WARN_ON(!csum || !csum_start)) { + kfree_skb(skb); + return NULL; + } + + *csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, mld_hdr_size, + IPPROTO_ICMPV6, + csum_partial(csum_start, mld_hdr_size, 0)); + skb_put(skb, mld_hdr_size); + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} +#endif + +static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, + struct net_bridge_port_group *pg, + struct br_ip *ip_dst, + struct br_ip *group, + bool with_srcs, bool over_lmqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) +{ + __be32 ip4_dst; + + switch (group->proto) { + case htons(ETH_P_IP): + ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP); + return br_ip4_multicast_alloc_query(br, pg, + ip4_dst, group->dst.ip4, + with_srcs, over_lmqt, + sflag, igmp_type, + need_rexmit); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): { + struct in6_addr ip6_dst; + + if (ip_dst) + ip6_dst = ip_dst->dst.ip6; + else + ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0, + htonl(1)); + + return br_ip6_multicast_alloc_query(br, pg, + &ip6_dst, &group->dst.ip6, + with_srcs, over_lmqt, + sflag, igmp_type, + need_rexmit); + } +#endif + } + return NULL; +} + +struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + int err; + + mp = br_mdb_ip_get(br, group); + if (mp) + return mp; + + if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) { + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false); + return ERR_PTR(-E2BIG); + } + + mp = kzalloc(sizeof(*mp), GFP_ATOMIC); + if (unlikely(!mp)) + return ERR_PTR(-ENOMEM); + + mp->br = br; + mp->addr = *group; + mp->mcast_gc.destroy = br_multicast_destroy_mdb_entry; + timer_setup(&mp->timer, br_multicast_group_expired, 0); + err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode, + br_mdb_rht_params); + if (err) { + kfree(mp); + mp = ERR_PTR(err); + } else { + hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list); + } + + return mp; +} + +static void br_multicast_group_src_expired(struct timer_list *t) +{ + struct net_bridge_group_src *src = from_timer(src, t, timer); + struct net_bridge_port_group *pg; + struct net_bridge *br = src->br; + + spin_lock(&br->multicast_lock); + if (hlist_unhashed(&src->node) || !netif_running(br->dev) || + timer_pending(&src->timer)) + goto out; + + pg = src->pg; + if (pg->filter_mode == MCAST_INCLUDE) { + br_multicast_del_group_src(src); + if (!hlist_empty(&pg->src_list)) + goto out; + br_multicast_find_del_pg(br, pg); + } else { + br_multicast_fwd_src_handle(src); + } + +out: + spin_unlock(&br->multicast_lock); +} + +static struct net_bridge_group_src * +br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip) +{ + struct net_bridge_group_src *ent; + + switch (ip->proto) { + case htons(ETH_P_IP): + hlist_for_each_entry(ent, &pg->src_list, node) + if (ip->src.ip4 == ent->addr.src.ip4) + return ent; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + hlist_for_each_entry(ent, &pg->src_list, node) + if (!ipv6_addr_cmp(&ent->addr.src.ip6, &ip->src.ip6)) + return ent; + break; +#endif + } + + return NULL; +} + +static struct net_bridge_group_src * +br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip) +{ + struct net_bridge_group_src *grp_src; + + if (unlikely(pg->src_ents >= PG_SRC_ENT_LIMIT)) + return NULL; + + switch (src_ip->proto) { + case htons(ETH_P_IP): + if (ipv4_is_zeronet(src_ip->src.ip4) || + ipv4_is_multicast(src_ip->src.ip4)) + return NULL; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (ipv6_addr_any(&src_ip->src.ip6) || + ipv6_addr_is_multicast(&src_ip->src.ip6)) + return NULL; + break; +#endif + } + + grp_src = kzalloc(sizeof(*grp_src), GFP_ATOMIC); + if (unlikely(!grp_src)) + return NULL; + + grp_src->pg = pg; + grp_src->br = pg->key.port->br; + grp_src->addr = *src_ip; + grp_src->mcast_gc.destroy = br_multicast_destroy_group_src; + timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0); + + hlist_add_head_rcu(&grp_src->node, &pg->src_list); + pg->src_ents++; + + return grp_src; +} + +struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group __rcu *next, + unsigned char flags, + const unsigned char *src, + u8 filter_mode, + u8 rt_protocol) +{ + struct net_bridge_port_group *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (unlikely(!p)) + return NULL; + + p->key.addr = *group; + p->key.port = port; + p->flags = flags; + p->filter_mode = filter_mode; + p->rt_protocol = rt_protocol; + p->mcast_gc.destroy = br_multicast_destroy_port_group; + INIT_HLIST_HEAD(&p->src_list); + + if (!br_multicast_is_star_g(group) && + rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode, + br_sg_port_rht_params)) { + kfree(p); + return NULL; + } + + rcu_assign_pointer(p->next, next); + timer_setup(&p->timer, br_multicast_port_group_expired, 0); + timer_setup(&p->rexmit_timer, br_multicast_port_group_rexmit, 0); + hlist_add_head(&p->mglist, &port->mglist); + + if (src) + memcpy(p->eth_addr, src, ETH_ALEN); + else + eth_broadcast_addr(p->eth_addr); + + return p; +} + +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) { + mp->host_joined = true; + if (br_multicast_is_star_g(&mp->addr)) + br_multicast_star_g_host_state(mp); + if (notify) + br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB); + } + mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); +} + +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) + return; + + mp->host_joined = false; + if (br_multicast_is_star_g(&mp->addr)) + br_multicast_star_g_host_state(mp); + if (notify) + br_mdb_notify(mp->br->dev, mp, NULL, RTM_DELMDB); +} + +static struct net_bridge_port_group * +__br_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + const unsigned char *src, + u8 filter_mode, + bool igmpv2_mldv1, + bool blocked) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p = NULL; + struct net_bridge_mdb_entry *mp; + unsigned long now = jiffies; + + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + mp = br_multicast_new_group(br, group); + if (IS_ERR(mp)) + return ERR_PTR(PTR_ERR(mp)); + + if (!port) { + br_multicast_host_join(mp, true); + goto out; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (br_port_group_equal(p, port, src)) + goto found; + if ((unsigned long)p->key.port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode, + RTPROT_KERNEL); + if (unlikely(!p)) { + p = ERR_PTR(-ENOMEM); + goto out; + } + rcu_assign_pointer(*pp, p); + if (blocked) + p->flags |= MDB_PG_FLAGS_BLOCKED; + br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); + +found: + if (igmpv2_mldv1) + mod_timer(&p->timer, now + br->multicast_membership_interval); + +out: + return p; +} + +static int br_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + const unsigned char *src, + u8 filter_mode, + bool igmpv2_mldv1) +{ + struct net_bridge_port_group *pg; + int err; + + spin_lock(&br->multicast_lock); + pg = __br_multicast_add_group(br, port, group, src, filter_mode, + igmpv2_mldv1, false); + /* NULL is considered valid for host joined groups */ + err = IS_ERR(pg) ? PTR_ERR(pg) : 0; + spin_unlock(&br->multicast_lock); + + return err; +} + +static int br_ip4_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid, + const unsigned char *src, + bool igmpv2) +{ + struct br_ip br_group; + u8 filter_mode; + + if (ipv4_is_local_multicast(group)) + return 0; + + memset(&br_group, 0, sizeof(br_group)); + br_group.dst.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; + filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE; + + return br_multicast_add_group(br, port, &br_group, src, filter_mode, + igmpv2); +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid, + const unsigned char *src, + bool mldv1) +{ + struct br_ip br_group; + u8 filter_mode; + + if (ipv6_addr_is_ll_all_nodes(group)) + return 0; + + memset(&br_group, 0, sizeof(br_group)); + br_group.dst.ip6 = *group; + br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; + filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE; + + return br_multicast_add_group(br, port, &br_group, src, filter_mode, + mldv1); +} +#endif + +static void br_multicast_router_expired(struct timer_list *t) +{ + struct net_bridge_port *port = + from_timer(port, t, multicast_router_timer); + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->multicast_router == MDB_RTR_TYPE_DISABLED || + port->multicast_router == MDB_RTR_TYPE_PERM || + timer_pending(&port->multicast_router_timer)) + goto out; + + __del_port_router(port); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_mc_router_state_change(struct net_bridge *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + +static void br_multicast_local_router_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, multicast_router_timer); + + spin_lock(&br->multicast_lock); + if (br->multicast_router == MDB_RTR_TYPE_DISABLED || + br->multicast_router == MDB_RTR_TYPE_PERM || + timer_pending(&br->multicast_router_timer)) + goto out; + + br_mc_router_state_change(br, false); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_querier_expired(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + goto out; + + br_multicast_start_querier(br, query); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_querier_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, ip4_other_query.timer); + + br_multicast_querier_expired(br, &br->ip4_own_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_querier_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, ip6_other_query.timer); + + br_multicast_querier_expired(br, &br->ip6_own_query); +} +#endif + +static void br_multicast_select_own_querier(struct net_bridge *br, + struct br_ip *ip, + struct sk_buff *skb) +{ + if (ip->proto == htons(ETH_P_IP)) + br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + else + br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; +#endif +} + +static void __br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct net_bridge_port_group *pg, + struct br_ip *ip_dst, + struct br_ip *group, + bool with_srcs, + u8 sflag, + bool *need_rexmit) +{ + bool over_lmqt = !!sflag; + struct sk_buff *skb; + u8 igmp_type; + +again_under_lmqt: + skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs, + over_lmqt, sflag, &igmp_type, + need_rexmit); + if (!skb) + return; + + if (port) { + skb->dev = port->dev; + br_multicast_count(br, port, skb, igmp_type, + BR_MCAST_DIR_TX); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, + dev_net(port->dev), NULL, skb, NULL, skb->dev, + br_dev_queue_push_xmit); + + if (over_lmqt && with_srcs && sflag) { + over_lmqt = false; + goto again_under_lmqt; + } + } else { + br_multicast_select_own_querier(br, group, skb); + br_multicast_count(br, port, skb, igmp_type, + BR_MCAST_DIR_RX); + netif_rx(skb); + } +} + +static void br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct bridge_mcast_own_query *own_query) +{ + struct bridge_mcast_other_query *other_query = NULL; + struct br_ip br_group; + unsigned long time; + + if (!netif_running(br->dev) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED) || + !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + return; + + memset(&br_group.dst, 0, sizeof(br_group.dst)); + + if (port ? (own_query == &port->ip4_own_query) : + (own_query == &br->ip4_own_query)) { + other_query = &br->ip4_other_query; + br_group.proto = htons(ETH_P_IP); +#if IS_ENABLED(CONFIG_IPV6) + } else { + other_query = &br->ip6_other_query; + br_group.proto = htons(ETH_P_IPV6); +#endif + } + + if (!other_query || timer_pending(&other_query->timer)) + return; + + __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0, + NULL); + + time = jiffies; + time += own_query->startup_sent < br->multicast_startup_query_count ? + br->multicast_startup_query_interval : + br->multicast_query_interval; + mod_timer(&own_query->timer, time); +} + +static void +br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_own_query *query) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + goto out; + + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + br_multicast_send_query(port->br, port, query); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_port_query_expired(struct timer_list *t) +{ + struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer); + + br_multicast_port_query_expired(port, &port->ip4_own_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_port_query_expired(struct timer_list *t) +{ + struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer); + + br_multicast_port_query_expired(port, &port->ip6_own_query); +} +#endif + +static void br_multicast_port_group_rexmit(struct timer_list *t) +{ + struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer); + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->key.port->br; + bool need_rexmit = false; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED) || + !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + goto out; + + if (pg->key.addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + if (!other_query || timer_pending(&other_query->timer)) + goto out; + + if (pg->grp_query_rexmit_cnt) { + pg->grp_query_rexmit_cnt--; + __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + &pg->key.addr, false, 1, NULL); + } + __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + &pg->key.addr, true, 0, &need_rexmit); + + if (pg->grp_query_rexmit_cnt || need_rexmit) + mod_timer(&pg->rexmit_timer, jiffies + + br->multicast_last_member_interval); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_mc_disabled_update(struct net_device *dev, bool value) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + .flags = SWITCHDEV_F_DEFER, + .u.mc_disabled = !value, + }; + + switchdev_port_attr_set(dev, &attr); +} + +int br_multicast_add_port(struct net_bridge_port *port) +{ + port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + + timer_setup(&port->multicast_router_timer, + br_multicast_router_expired, 0); + timer_setup(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, 0); +#if IS_ENABLED(CONFIG_IPV6) + timer_setup(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, 0); +#endif + br_mc_disabled_update(port->dev, + br_opt_get(port->br, BROPT_MULTICAST_ENABLED)); + + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!port->mcast_stats) + return -ENOMEM; + + return 0; +} + +void br_multicast_del_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + struct net_bridge_port_group *pg; + HLIST_HEAD(deleted_head); + struct hlist_node *n; + + /* Take care of the remaining groups, only perm ones should be left */ + spin_lock_bh(&br->multicast_lock); + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) + br_multicast_find_del_pg(br, pg); + hlist_move_list(&br->mcast_gc_list, &deleted_head); + spin_unlock_bh(&br->multicast_lock); + br_multicast_gc(&deleted_head); + del_timer_sync(&port->multicast_router_timer); + free_percpu(port->mcast_stats); +} + +static void br_multicast_enable(struct bridge_mcast_own_query *query) +{ + query->startup_sent = 0; + + if (try_to_del_timer_sync(&query->timer) >= 0 || + del_timer(&query->timer)) + mod_timer(&query->timer, jiffies); +} + +static void __br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev)) + return; + + br_multicast_enable(&port->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_enable(&port->ip6_own_query); +#endif + if (port->multicast_router == MDB_RTR_TYPE_PERM && + hlist_unhashed(&port->rlist)) + br_multicast_add_router(br, port); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + __br_multicast_enable_port(port); + spin_unlock(&br->multicast_lock); +} + +void br_multicast_disable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + struct net_bridge_port_group *pg; + struct hlist_node *n; + + spin_lock(&br->multicast_lock); + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) + if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) + br_multicast_find_del_pg(br, pg); + + __del_port_router(port); + + del_timer(&port->multicast_router_timer); + del_timer(&port->ip4_own_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer(&port->ip6_own_query.timer); +#endif + spin_unlock(&br->multicast_lock); +} + +static int __grp_src_delete_marked(struct net_bridge_port_group *pg) +{ + struct net_bridge_group_src *ent; + struct hlist_node *tmp; + int deleted = 0; + + hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) + if (ent->flags & BR_SGRP_F_DELETE) { + br_multicast_del_group_src(ent); + deleted++; + } + + return deleted; +} + +static void __grp_src_mod_timer(struct net_bridge_group_src *src, + unsigned long expires) +{ + mod_timer(&src->timer, expires); + br_multicast_fwd_src_handle(src); +} + +static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) +{ + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->key.port->br; + u32 lmqc = br->multicast_last_member_count; + unsigned long lmqt, lmi, now = jiffies; + struct net_bridge_group_src *ent; + + if (!netif_running(br->dev) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return; + + if (pg->key.addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + lmqt = now + br_multicast_lmqt(br); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (ent->flags & BR_SGRP_F_SEND) { + ent->flags &= ~BR_SGRP_F_SEND; + if (ent->timer.expires > lmqt) { + if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + other_query && + !timer_pending(&other_query->timer)) + ent->src_query_rexmit_cnt = lmqc; + __grp_src_mod_timer(ent, lmqt); + } + } + } + + if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) || + !other_query || timer_pending(&other_query->timer)) + return; + + __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + &pg->key.addr, true, 1, NULL); + + lmi = now + br->multicast_last_member_interval; + if (!timer_pending(&pg->rexmit_timer) || + time_after(pg->rexmit_timer.expires, lmi)) + mod_timer(&pg->rexmit_timer, lmi); +} + +static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) +{ + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->key.port->br; + unsigned long now = jiffies, lmi; + + if (!netif_running(br->dev) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return; + + if (pg->key.addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + other_query && !timer_pending(&other_query->timer)) { + lmi = now + br->multicast_last_member_interval; + pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1; + __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + &pg->key.addr, false, 0, NULL); + if (!timer_pending(&pg->rexmit_timer) || + time_after(pg->rexmit_timer.expires, lmi)) + mod_timer(&pg->rexmit_timer, lmi); + } + + if (pg->filter_mode == MCAST_EXCLUDE && + (!timer_pending(&pg->timer) || + time_after(pg->timer.expires, now + br_multicast_lmqt(br)))) + mod_timer(&pg->timer, now + br_multicast_lmqt(br)); +} + +/* State Msg type New state Actions + * INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI + * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI + * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI + */ +static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (!ent) { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + + if (ent) + __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + srcs += src_size; + } + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Group Timer=GMI + */ +static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + struct br_ip src_ip; + u32 src_idx; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) + ent->flags &= ~BR_SGRP_F_DELETE; + else + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + br_multicast_fwd_src_handle(ent); + srcs += src_size; + } + + __grp_src_delete_marked(pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI + * Delete (X-A) + * Delete (Y-A) + * Group Timer=GMI + */ +static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_DELETE; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + __grp_src_mod_timer(ent, + now + br_multicast_gmi(br)); + changed = true; + } + } + srcs += src_size; + } + + if (__grp_src_delete_marked(pg)) + changed = true; + + return changed; +} + +static bool br_multicast_isexc(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_isexc_incl(pg, srcs, nsrcs, src_size); + br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); + changed = true; + break; + case MCAST_EXCLUDE: + changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size); + break; + } + + pg->filter_mode = MCAST_EXCLUDE; + mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI + * Send Q(G,A-B) + */ +static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + u32 src_idx, to_send = pg->src_ents; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_SEND; + to_send--; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + if (ent) + __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) TO_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI + * Send Q(G,X-A) + * Send Q(G) + */ +static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + u32 src_idx, to_send = pg->src_ents; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + if (timer_pending(&ent->timer)) + ent->flags |= BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + if (timer_pending(&ent->timer)) { + ent->flags &= ~BR_SGRP_F_SEND; + to_send--; + } + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + if (ent) + __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + __grp_send_query_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_toin(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size); + break; + case MCAST_EXCLUDE: + changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size); + break; + } + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Send Q(G,A*B) + * Group Timer=GMI + */ +static void __grp_src_toex_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | + BR_SGRP_F_SEND; + to_send++; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + } + if (ent) + br_multicast_fwd_src_handle(ent); + srcs += src_size; + } + + __grp_src_delete_marked(pg); + if (to_send) + __grp_src_query_marked_and_rexmit(pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer + * Delete (X-A) + * Delete (Y-A) + * Send Q(G,A-Y) + * Group Timer=GMI + */ +static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_DELETE; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + __grp_src_mod_timer(ent, pg->timer.expires); + changed = true; + } + } + if (ent && timer_pending(&ent->timer)) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (__grp_src_delete_marked(pg)) + changed = true; + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_toex(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->key.port->br; + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_toex_incl(pg, srcs, nsrcs, src_size); + br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); + changed = true; + break; + case MCAST_EXCLUDE: + changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size); + break; + } + + pg->filter_mode = MCAST_EXCLUDE; + mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) + */ +static void __grp_src_block_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags &= ~BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) + br_multicast_find_del_pg(pg->key.port->br, pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer + * Send Q(G,A-Y) + */ +static bool __grp_src_block_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags &= ~BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.src, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (!ent) { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + __grp_src_mod_timer(ent, pg->timer.expires); + changed = true; + } + } + if (ent && timer_pending(&ent->timer)) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_block(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_block_incl(pg, srcs, nsrcs, src_size); + break; + case MCAST_EXCLUDE: + changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size); + break; + } + + return changed; +} + +static struct net_bridge_port_group * +br_multicast_find_port(struct net_bridge_mdb_entry *mp, + struct net_bridge_port *p, + const unsigned char *src) +{ + struct net_bridge *br __maybe_unused = mp->br; + struct net_bridge_port_group *pg; + + for (pg = mlock_dereference(mp->ports, br); + pg; + pg = mlock_dereference(pg->next, br)) + if (br_port_group_equal(pg, p, src)) + return pg; + + return NULL; +} + +static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + bool igmpv2 = br->multicast_igmp_version == 2; + struct net_bridge_mdb_entry *mdst; + struct net_bridge_port_group *pg; + const unsigned char *src; + struct igmpv3_report *ih; + struct igmpv3_grec *grec; + int i, len, num, type; + bool changed = false; + __be32 group; + int err = 0; + u16 nsrcs; + + ih = igmpv3_report_hdr(skb); + num = ntohs(ih->ngrec); + len = skb_transport_offset(skb) + sizeof(*ih); + + for (i = 0; i < num; i++) { + len += sizeof(*grec); + if (!ip_mc_may_pull(skb, len)) + return -EINVAL; + + grec = (void *)(skb->data + len - sizeof(*grec)); + group = grec->grec_mca; + type = grec->grec_type; + nsrcs = ntohs(grec->grec_nsrcs); + + len += nsrcs * 4; + if (!ip_mc_may_pull(skb, len)) + return -EINVAL; + + switch (type) { + case IGMPV3_MODE_IS_INCLUDE: + case IGMPV3_MODE_IS_EXCLUDE: + case IGMPV3_CHANGE_TO_INCLUDE: + case IGMPV3_CHANGE_TO_EXCLUDE: + case IGMPV3_ALLOW_NEW_SOURCES: + case IGMPV3_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + src = eth_hdr(skb)->h_source; + if (nsrcs == 0 && + (type == IGMPV3_CHANGE_TO_INCLUDE || + type == IGMPV3_MODE_IS_INCLUDE)) { + if (!port || igmpv2) { + br_ip4_multicast_leave_group(br, port, group, vid, src); + continue; + } + } else { + err = br_ip4_multicast_add_group(br, port, group, vid, + src, igmpv2); + if (err) + break; + } + + if (!port || igmpv2) + continue; + + spin_lock_bh(&br->multicast_lock); + mdst = br_mdb_ip4_get(br, group, vid); + if (!mdst) + goto unlock_continue; + pg = br_multicast_find_port(mdst, port, src); + if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) + goto unlock_continue; + /* reload grec */ + grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4)); + switch (type) { + case IGMPV3_ALLOW_NEW_SOURCES: + changed = br_multicast_isinc_allow(pg, grec->grec_src, + nsrcs, sizeof(__be32)); + break; + case IGMPV3_MODE_IS_INCLUDE: + changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_MODE_IS_EXCLUDE: + changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_CHANGE_TO_INCLUDE: + changed = br_multicast_toin(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_CHANGE_TO_EXCLUDE: + changed = br_multicast_toex(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_BLOCK_OLD_SOURCES: + changed = br_multicast_block(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + } + if (changed) + br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); +unlock_continue: + spin_unlock_bh(&br->multicast_lock); + } + + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_mld2_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + bool mldv1 = br->multicast_mld_version == 1; + struct net_bridge_mdb_entry *mdst; + struct net_bridge_port_group *pg; + unsigned int nsrcs_offset; + const unsigned char *src; + struct icmp6hdr *icmp6h; + struct mld2_grec *grec; + unsigned int grec_len; + bool changed = false; + int i, len, num; + int err = 0; + + if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h))) + return -EINVAL; + + icmp6h = icmp6_hdr(skb); + num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); + len = skb_transport_offset(skb) + sizeof(*icmp6h); + + for (i = 0; i < num; i++) { + __be16 *_nsrcs, __nsrcs; + u16 nsrcs; + + nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); + + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < + nsrcs_offset + sizeof(__nsrcs)) + return -EINVAL; + + _nsrcs = skb_header_pointer(skb, nsrcs_offset, + sizeof(__nsrcs), &__nsrcs); + if (!_nsrcs) + return -EINVAL; + + nsrcs = ntohs(*_nsrcs); + grec_len = struct_size(grec, grec_src, nsrcs); + + if (!ipv6_mc_may_pull(skb, len + grec_len)) + return -EINVAL; + + grec = (struct mld2_grec *)(skb->data + len); + len += grec_len; + + switch (grec->grec_type) { + case MLD2_MODE_IS_INCLUDE: + case MLD2_MODE_IS_EXCLUDE: + case MLD2_CHANGE_TO_INCLUDE: + case MLD2_CHANGE_TO_EXCLUDE: + case MLD2_ALLOW_NEW_SOURCES: + case MLD2_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + src = eth_hdr(skb)->h_source; + if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || + grec->grec_type == MLD2_MODE_IS_INCLUDE) && + nsrcs == 0) { + if (!port || mldv1) { + br_ip6_multicast_leave_group(br, port, + &grec->grec_mca, + vid, src); + continue; + } + } else { + err = br_ip6_multicast_add_group(br, port, + &grec->grec_mca, vid, + src, mldv1); + if (err) + break; + } + + if (!port || mldv1) + continue; + + spin_lock_bh(&br->multicast_lock); + mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid); + if (!mdst) + goto unlock_continue; + pg = br_multicast_find_port(mdst, port, src); + if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) + goto unlock_continue; + switch (grec->grec_type) { + case MLD2_ALLOW_NEW_SOURCES: + changed = br_multicast_isinc_allow(pg, grec->grec_src, + nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_MODE_IS_INCLUDE: + changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_MODE_IS_EXCLUDE: + changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_CHANGE_TO_INCLUDE: + changed = br_multicast_toin(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_CHANGE_TO_EXCLUDE: + changed = br_multicast_toex(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_BLOCK_OLD_SOURCES: + changed = br_multicast_block(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + } + if (changed) + br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); +unlock_continue: + spin_unlock_bh(&br->multicast_lock); + } + + return err; +} +#endif + +static bool br_ip4_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + __be32 saddr) +{ + if (!timer_pending(&br->ip4_own_query.timer) && + !timer_pending(&br->ip4_other_query.timer)) + goto update; + + if (!br->ip4_querier.addr.src.ip4) + goto update; + + if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4)) + goto update; + + return false; + +update: + br->ip4_querier.addr.src.ip4 = saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip4_querier.port, port); + + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool br_ip6_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct in6_addr *saddr) +{ + if (!timer_pending(&br->ip6_own_query.timer) && + !timer_pending(&br->ip6_other_query.timer)) + goto update; + + if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0) + goto update; + + return false; + +update: + br->ip6_querier.addr.src.ip6 = *saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip6_querier.port, port); + + return true; +} +#endif + +static bool br_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *saddr) +{ + switch (saddr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_select_querier(br, port, saddr->src.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_ip6_multicast_select_querier(br, port, &saddr->src.ip6); +#endif + } + + return false; +} + +static void +br_multicast_update_query_timer(struct net_bridge *br, + struct bridge_mcast_other_query *query, + unsigned long max_delay) +{ + if (!timer_pending(&query->timer)) + query->delay_time = jiffies + max_delay; + + mod_timer(&query->timer, jiffies + br->multicast_querier_interval); +} + +static void br_port_mc_router_state_change(struct net_bridge_port *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + +/* + * Add port to router_list + * list is maintained ordered by pointer value + * and locked by br->multicast_lock and RCU + */ +static void br_multicast_add_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + struct net_bridge_port *p; + struct hlist_node *slot = NULL; + + if (!hlist_unhashed(&port->rlist)) + return; + + hlist_for_each_entry(p, &br->router_list, rlist) { + if ((unsigned long) port >= (unsigned long) p) + break; + slot = &p->rlist; + } + + if (slot) + hlist_add_behind_rcu(&port->rlist, slot); + else + hlist_add_head_rcu(&port->rlist, &br->router_list); + br_rtr_notify(br->dev, port, RTM_NEWMDB); + br_port_mc_router_state_change(port, true); +} + +static void br_multicast_mark_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + unsigned long now = jiffies; + + if (!port) { + if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { + if (!timer_pending(&br->multicast_router_timer)) + br_mc_router_state_change(br, true); + mod_timer(&br->multicast_router_timer, + now + br->multicast_querier_interval); + } + return; + } + + if (port->multicast_router == MDB_RTR_TYPE_DISABLED || + port->multicast_router == MDB_RTR_TYPE_PERM) + return; + + br_multicast_add_router(br, port); + + mod_timer(&port->multicast_router_timer, + now + br->multicast_querier_interval); +} + +static void br_multicast_query_received(struct net_bridge *br, + struct net_bridge_port *port, + struct bridge_mcast_other_query *query, + struct br_ip *saddr, + unsigned long max_delay) +{ + if (!br_multicast_select_querier(br, port, saddr)) + return; + + br_multicast_update_query_timer(br, query, max_delay); + br_multicast_mark_router(br, port); +} + +static void br_ip4_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + unsigned int transport_len = ip_transport_len(skb); + const struct iphdr *iph = ip_hdr(skb); + struct igmphdr *ih = igmp_hdr(skb); + struct net_bridge_mdb_entry *mp; + struct igmpv3_query *ih3; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; + unsigned long max_delay; + unsigned long now = jiffies; + __be32 group; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + group = ih->group; + + if (transport_len == sizeof(*ih)) { + max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); + + if (!max_delay) { + max_delay = 10 * HZ; + group = 0; + } + } else if (transport_len >= sizeof(*ih3)) { + ih3 = igmpv3_query_hdr(skb); + if (ih3->nsrcs || + (br->multicast_igmp_version == 3 && group && ih3->suppress)) + goto out; + + max_delay = ih3->code ? + IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; + } else { + goto out; + } + + if (!group) { + saddr.proto = htons(ETH_P_IP); + saddr.src.ip4 = iph->saddr; + + br_multicast_query_received(br, port, &br->ip4_other_query, + &saddr, max_delay); + goto out; + } + + mp = br_mdb_ip4_get(br, group, vid); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + + if (mp->host_joined && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0 && + (br->multicast_igmp_version == 2 || + p->filter_mode == MCAST_EXCLUDE)) + mod_timer(&p->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + unsigned int transport_len = ipv6_transport_len(skb); + struct mld_msg *mld; + struct net_bridge_mdb_entry *mp; + struct mld2_query *mld2q; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; + unsigned long max_delay; + unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); + const struct in6_addr *group = NULL; + bool is_general_query; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + if (transport_len == sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *) icmp6_hdr(skb); + max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); + if (max_delay) + group = &mld->mld_mca; + } else { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { + err = -EINVAL; + goto out; + } + mld2q = (struct mld2_query *)icmp6_hdr(skb); + if (!mld2q->mld2q_nsrcs) + group = &mld2q->mld2q_mca; + if (br->multicast_mld_version == 2 && + !ipv6_addr_any(&mld2q->mld2q_mca) && + mld2q->mld2q_suppress) + goto out; + + max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); + } + + is_general_query = group && ipv6_addr_any(group); + + if (is_general_query) { + saddr.proto = htons(ETH_P_IPV6); + saddr.src.ip6 = ipv6_hdr(skb)->saddr; + + br_multicast_query_received(br, port, &br->ip6_other_query, + &saddr, max_delay); + goto out; + } else if (!group) { + goto out; + } + + mp = br_mdb_ip6_get(br, group, vid); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + if (mp->host_joined && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0 && + (br->multicast_mld_version == 1 || + p->filter_mode == MCAST_EXCLUDE)) + mod_timer(&p->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} +#endif + +static void +br_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + struct bridge_mcast_other_query *other_query, + struct bridge_mcast_own_query *own_query, + const unsigned char *src) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + unsigned long now; + unsigned long time; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + mp = br_mdb_ip_get(br, group); + if (!mp) + goto out; + + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!br_port_group_equal(p, port, src)) + continue; + + if (p->flags & MDB_PG_FLAGS_PERMANENT) + break; + + p->flags |= MDB_PG_FLAGS_FAST_LEAVE; + br_multicast_del_pg(mp, p, pp); + } + goto out; + } + + if (timer_pending(&other_query->timer)) + goto out; + + if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { + __br_multicast_send_query(br, port, NULL, NULL, &mp->addr, + false, 0, NULL); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + + mod_timer(&own_query->timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (!br_port_group_equal(p, port, src)) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + + now = jiffies; + time = now + br->multicast_last_member_count * + br->multicast_last_member_interval; + + if (!port) { + if (mp->host_joined && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, time) : + try_to_del_timer_sync(&mp->timer) >= 0)) { + mod_timer(&mp->timer, time); + } + + goto out; + } + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->key.port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid, + const unsigned char *src) +{ + struct br_ip br_group; + struct bridge_mcast_own_query *own_query; + + if (ipv4_is_local_multicast(group)) + return; + + own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + + memset(&br_group, 0, sizeof(br_group)); + br_group.dst.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; + + br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + own_query, src); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid, + const unsigned char *src) +{ + struct br_ip br_group; + struct bridge_mcast_own_query *own_query; + + if (ipv6_addr_is_ll_all_nodes(group)) + return; + + own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + + memset(&br_group, 0, sizeof(br_group)); + br_group.dst.ip6 = *group; + br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; + + br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + own_query, src); +} +#endif + +static void br_multicast_err_count(const struct net_bridge *br, + const struct net_bridge_port *p, + __be16 proto) +{ + struct bridge_mcast_stats __percpu *stats; + struct bridge_mcast_stats *pstats; + + if (!br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + pstats = this_cpu_ptr(stats); + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + pstats->mstats.igmp_parse_errors++; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + pstats->mstats.mld_parse_errors++; + break; +#endif + } + u64_stats_update_end(&pstats->syncp); +} + +static void br_multicast_pim(struct net_bridge *br, + struct net_bridge_port *port, + const struct sk_buff *skb) +{ + unsigned int offset = skb_transport_offset(skb); + struct pimhdr *pimhdr, _pimhdr; + + pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr); + if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION || + pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) + return; + + spin_lock(&br->multicast_lock); + br_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); +} + +static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + if (ip_hdr(skb)->protocol != IPPROTO_IGMP || + igmp_hdr(skb)->type != IGMP_MRDISC_ADV) + return -ENOMSG; + + spin_lock(&br->multicast_lock); + br_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); + + return 0; +} + +static int br_multicast_ipv4_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + const unsigned char *src; + struct igmphdr *ih; + int err; + + err = ip_mc_check_igmp(skb); + + if (err == -ENOMSG) { + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { + if (ip_hdr(skb)->protocol == IPPROTO_PIM) + br_multicast_pim(br, port, skb); + } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { + br_ip4_multicast_mrd_rcv(br, port, skb); + } + + return 0; + } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } + + ih = igmp_hdr(skb); + src = eth_hdr(skb)->h_source; + BR_INPUT_SKB_CB(skb)->igmp = ih->type; + + switch (ih->type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = br_ip4_multicast_add_group(br, port, ih->group, vid, src, + true); + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + err = br_ip4_multicast_igmp3_report(br, port, skb, vid); + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + br_ip4_multicast_query(br, port, skb, vid); + break; + case IGMP_HOST_LEAVE_MESSAGE: + br_ip4_multicast_leave_group(br, port, ih->group, vid, src); + break; + } + + br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) + return; + + spin_lock(&br->multicast_lock); + br_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); +} + +static int br_multicast_ipv6_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + const unsigned char *src; + struct mld_msg *mld; + int err; + + err = ipv6_mc_check_mld(skb); + + if (err == -ENOMSG || err == -ENODATA) { + if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + if (err == -ENODATA && + ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) + br_ip6_multicast_mrd_rcv(br, port, skb); + + return 0; + } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type; + + switch (mld->mld_type) { + case ICMPV6_MGM_REPORT: + src = eth_hdr(skb)->h_source; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, + src, true); + break; + case ICMPV6_MLD2_REPORT: + err = br_ip6_multicast_mld2_report(br, port, skb, vid); + break; + case ICMPV6_MGM_QUERY: + err = br_ip6_multicast_query(br, port, skb, vid); + break; + case ICMPV6_MGM_REDUCTION: + src = eth_hdr(skb)->h_source; + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src); + break; + } + + br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + + return err; +} +#endif + +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb, u16 vid) +{ + int ret = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 0; + BR_INPUT_SKB_CB(skb)->mrouters_only = 0; + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return 0; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = br_multicast_ipv4_rcv(br, port, skb, vid); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ret = br_multicast_ipv6_rcv(br, port, skb, vid); + break; +#endif + } + + return ret; +} + +static void br_multicast_query_expired(struct net_bridge *br, + struct bridge_mcast_own_query *query, + struct bridge_mcast_querier *querier) +{ + spin_lock(&br->multicast_lock); + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + RCU_INIT_POINTER(querier->port, NULL); + br_multicast_send_query(br, NULL, query); + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_query_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, ip4_own_query.timer); + + br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_query_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, ip6_own_query.timer); + + br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); +} +#endif + +static void br_multicast_gc_work(struct work_struct *work) +{ + struct net_bridge *br = container_of(work, struct net_bridge, + mcast_gc_work); + HLIST_HEAD(deleted_head); + + spin_lock_bh(&br->multicast_lock); + hlist_move_list(&br->mcast_gc_list, &deleted_head); + spin_unlock_bh(&br->multicast_lock); + + br_multicast_gc(&deleted_head); +} + +void br_multicast_init(struct net_bridge *br) +{ + br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; + + br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + br->multicast_last_member_count = 2; + br->multicast_startup_query_count = 2; + + br->multicast_last_member_interval = HZ; + br->multicast_query_response_interval = 10 * HZ; + br->multicast_startup_query_interval = 125 * HZ / 4; + br->multicast_query_interval = 125 * HZ; + br->multicast_querier_interval = 255 * HZ; + br->multicast_membership_interval = 260 * HZ; + + br->ip4_other_query.delay_time = 0; + br->ip4_querier.port = NULL; + br->multicast_igmp_version = 2; +#if IS_ENABLED(CONFIG_IPV6) + br->multicast_mld_version = 1; + br->ip6_other_query.delay_time = 0; + br->ip6_querier.port = NULL; +#endif + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); + br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + + spin_lock_init(&br->multicast_lock); + timer_setup(&br->multicast_router_timer, + br_multicast_local_router_expired, 0); + timer_setup(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, 0); + timer_setup(&br->ip4_own_query.timer, + br_ip4_multicast_query_expired, 0); +#if IS_ENABLED(CONFIG_IPV6) + timer_setup(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, 0); + timer_setup(&br->ip6_own_query.timer, + br_ip6_multicast_query_expired, 0); +#endif + INIT_HLIST_HEAD(&br->mdb_list); + INIT_HLIST_HEAD(&br->mcast_gc_list); + INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work); +} + +static void br_ip4_multicast_join_snoopers(struct net_bridge *br) +{ + struct in_device *in_dev = in_dev_get(br->dev); + + if (!in_dev) + return; + + __ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); + in_dev_put(in_dev); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_join_snoopers(struct net_bridge *br) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); + ipv6_dev_mc_inc(br->dev, &addr); +} +#else +static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) +{ +} +#endif + +void br_multicast_join_snoopers(struct net_bridge *br) +{ + br_ip4_multicast_join_snoopers(br); + br_ip6_multicast_join_snoopers(br); +} + +static void br_ip4_multicast_leave_snoopers(struct net_bridge *br) +{ + struct in_device *in_dev = in_dev_get(br->dev); + + if (WARN_ON(!in_dev)) + return; + + __ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); + in_dev_put(in_dev); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_snoopers(struct net_bridge *br) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); + ipv6_dev_mc_dec(br->dev, &addr); +} +#else +static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) +{ +} +#endif + +void br_multicast_leave_snoopers(struct net_bridge *br) +{ + br_ip4_multicast_leave_snoopers(br); + br_ip6_multicast_leave_snoopers(br); +} + +static void __br_multicast_open(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + query->startup_sent = 0; + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return; + + mod_timer(&query->timer, jiffies); +} + +void br_multicast_open(struct net_bridge *br) +{ + __br_multicast_open(br, &br->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + __br_multicast_open(br, &br->ip6_own_query); +#endif +} + +void br_multicast_stop(struct net_bridge *br) +{ + del_timer_sync(&br->multicast_router_timer); + del_timer_sync(&br->ip4_other_query.timer); + del_timer_sync(&br->ip4_own_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&br->ip6_other_query.timer); + del_timer_sync(&br->ip6_own_query.timer); +#endif +} + +void br_multicast_dev_del(struct net_bridge *br) +{ + struct net_bridge_mdb_entry *mp; + HLIST_HEAD(deleted_head); + struct hlist_node *tmp; + + spin_lock_bh(&br->multicast_lock); + hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) + br_multicast_del_mdb_entry(mp); + hlist_move_list(&br->mcast_gc_list, &deleted_head); + spin_unlock_bh(&br->multicast_lock); + + br_multicast_gc(&deleted_head); + cancel_work_sync(&br->mcast_gc_work); + + rcu_barrier(); +} + +int br_multicast_set_router(struct net_bridge *br, unsigned long val) +{ + int err = -EINVAL; + + spin_lock_bh(&br->multicast_lock); + + switch (val) { + case MDB_RTR_TYPE_DISABLED: + case MDB_RTR_TYPE_PERM: + br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); + del_timer(&br->multicast_router_timer); + br->multicast_router = val; + err = 0; + break; + case MDB_RTR_TYPE_TEMP_QUERY: + if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) + br_mc_router_state_change(br, false); + br->multicast_router = val; + err = 0; + break; + } + + spin_unlock_bh(&br->multicast_lock); + + return err; +} + +static void __del_port_router(struct net_bridge_port *p) +{ + if (hlist_unhashed(&p->rlist)) + return; + hlist_del_init_rcu(&p->rlist); + br_rtr_notify(p->br->dev, p, RTM_DELMDB); + br_port_mc_router_state_change(p, false); + + /* don't allow timer refresh */ + if (p->multicast_router == MDB_RTR_TYPE_TEMP) + p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; +} + +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) +{ + struct net_bridge *br = p->br; + unsigned long now = jiffies; + int err = -EINVAL; + + spin_lock(&br->multicast_lock); + if (p->multicast_router == val) { + /* Refresh the temp router port timer */ + if (p->multicast_router == MDB_RTR_TYPE_TEMP) + mod_timer(&p->multicast_router_timer, + now + br->multicast_querier_interval); + err = 0; + goto unlock; + } + switch (val) { + case MDB_RTR_TYPE_DISABLED: + p->multicast_router = MDB_RTR_TYPE_DISABLED; + __del_port_router(p); + del_timer(&p->multicast_router_timer); + break; + case MDB_RTR_TYPE_TEMP_QUERY: + p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + __del_port_router(p); + break; + case MDB_RTR_TYPE_PERM: + p->multicast_router = MDB_RTR_TYPE_PERM; + del_timer(&p->multicast_router_timer); + br_multicast_add_router(br, p); + break; + case MDB_RTR_TYPE_TEMP: + p->multicast_router = MDB_RTR_TYPE_TEMP; + br_multicast_mark_router(br, p); + break; + default: + goto unlock; + } + err = 0; +unlock: + spin_unlock(&br->multicast_lock); + + return err; +} + +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + struct net_bridge_port *port; + + __br_multicast_open(br, query); + + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + continue; + + if (query == &br->ip4_own_query) + br_multicast_enable(&port->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + else + br_multicast_enable(&port->ip6_own_query); +#endif + } + rcu_read_unlock(); +} + +int br_multicast_toggle(struct net_bridge *br, unsigned long val) +{ + struct net_bridge_port *port; + bool change_snoopers = false; + + spin_lock_bh(&br->multicast_lock); + if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) + goto unlock; + + br_mc_disabled_update(br->dev, val); + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { + change_snoopers = true; + goto unlock; + } + + if (!netif_running(br->dev)) + goto unlock; + + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port(port); + + change_snoopers = true; + +unlock: + spin_unlock_bh(&br->multicast_lock); + + /* br_multicast_join_snoopers has the potential to cause + * an MLD Report/Leave to be delivered to br_multicast_rcv, + * which would in turn call br_multicast_add_group, which would + * attempt to acquire multicast_lock. This function should be + * called after the lock has been released to avoid deadlocks on + * multicast_lock. + * + * br_multicast_leave_snoopers does not have the problem since + * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and + * returns without calling br_multicast_ipv4/6_rcv if it's not + * enabled. Moved both functions out just for symmetry. + */ + if (change_snoopers) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + else + br_multicast_leave_snoopers(br); + } + + return 0; +} + +bool br_multicast_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return !!br_opt_get(br, BROPT_MULTICAST_ENABLED); +} +EXPORT_SYMBOL_GPL(br_multicast_enabled); + +bool br_multicast_router(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + bool is_router; + + spin_lock_bh(&br->multicast_lock); + is_router = br_multicast_is_router(br); + spin_unlock_bh(&br->multicast_lock); + return is_router; +} +EXPORT_SYMBOL_GPL(br_multicast_router); + +int br_multicast_set_querier(struct net_bridge *br, unsigned long val) +{ + unsigned long max_delay; + + val = !!val; + + spin_lock_bh(&br->multicast_lock); + if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val) + goto unlock; + + br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val); + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + + if (!timer_pending(&br->ip4_other_query.timer)) + br->ip4_other_query.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip4_own_query); + +#if IS_ENABLED(CONFIG_IPV6) + if (!timer_pending(&br->ip6_other_query.timer)) + br->ip6_other_query.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip6_own_query); +#endif + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return 0; +} + +int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) +{ + /* Currently we support only version 2 and 3 */ + switch (val) { + case 2: + case 3: + break; + default: + return -EINVAL; + } + + spin_lock_bh(&br->multicast_lock); + br->multicast_igmp_version = val; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) +{ + /* Currently we support version 1 and 2 */ + switch (val) { + case 1: + case 2: + break; + default: + return -EINVAL; + } + + spin_lock_bh(&br->multicast_lock); + br->multicast_mld_version = val; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} +#endif + +/** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses + * @br_ip_list: The list to store found, snooped multicast IP addresses in + * + * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast + * snooping feature on all bridge ports of dev's bridge device, excluding + * the addresses from dev itself. + * + * Returns the number of items added to br_ip_list. + * + * Notes: + * - br_ip_list needs to be initialized by caller + * - br_ip_list might contain duplicates in the end + * (needs to be taken care of by caller) + * - br_ip_list needs to be freed by caller + */ +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct net_bridge_port_group *group; + struct br_ip_list *entry; + int count = 0; + + rcu_read_lock(); + if (!br_ip_list || !netif_is_bridge_port(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + list_for_each_entry_rcu(port, &br->port_list, list) { + if (!port->dev || port->dev == dev) + continue; + + hlist_for_each_entry_rcu(group, &port->mglist, mglist) { + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto unlock; + + entry->addr = group->key.addr; + list_add(&entry->list, br_ip_list); + count++; + } + } + +unlock: + rcu_read_unlock(); + return count; +} +EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); + +/** + * br_multicast_has_querier_anywhere - Checks for a querier on a bridge + * @dev: The bridge port providing the bridge on which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a valid querier exists anywhere on the bridged link layer. + * Otherwise returns false. + */ +bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct ethhdr eth; + bool ret = false; + + rcu_read_lock(); + if (!netif_is_bridge_port(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + memset(ð, 0, sizeof(eth)); + eth.h_proto = htons(proto); + + ret = br_multicast_querier_exists(br, ð); + +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); + +/** + * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port + * @dev: The bridge port adjacent to which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a selected querier is behind one of the other ports of this + * bridge. Otherwise returns false. + */ +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + bool ret = false; + + rcu_read_lock(); + if (!netif_is_bridge_port(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + switch (proto) { + case ETH_P_IP: + if (!timer_pending(&br->ip4_other_query.timer) || + rcu_dereference(br->ip4_querier.port) == port) + goto unlock; + break; +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: + if (!timer_pending(&br->ip6_other_query.timer) || + rcu_dereference(br->ip6_querier.port) == port) + goto unlock; + break; +#endif + default: + goto unlock; + } + + ret = true; +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); + +static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, + const struct sk_buff *skb, u8 type, u8 dir) +{ + struct bridge_mcast_stats *pstats = this_cpu_ptr(stats); + __be16 proto = skb->protocol; + unsigned int t_len; + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + switch (type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v1reports[dir]++; + break; + case IGMPV2_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v2reports[dir]++; + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v3reports[dir]++; + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + if (t_len != sizeof(struct igmphdr)) { + pstats->mstats.igmp_v3queries[dir]++; + } else { + unsigned int offset = skb_transport_offset(skb); + struct igmphdr *ih, _ihdr; + + ih = skb_header_pointer(skb, offset, + sizeof(_ihdr), &_ihdr); + if (!ih) + break; + if (!ih->code) + pstats->mstats.igmp_v1queries[dir]++; + else + pstats->mstats.igmp_v2queries[dir]++; + } + break; + case IGMP_HOST_LEAVE_MESSAGE: + pstats->mstats.igmp_leaves[dir]++; + break; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + t_len = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); + t_len -= skb_network_header_len(skb); + switch (type) { + case ICMPV6_MGM_REPORT: + pstats->mstats.mld_v1reports[dir]++; + break; + case ICMPV6_MLD2_REPORT: + pstats->mstats.mld_v2reports[dir]++; + break; + case ICMPV6_MGM_QUERY: + if (t_len != sizeof(struct mld_msg)) + pstats->mstats.mld_v2queries[dir]++; + else + pstats->mstats.mld_v1queries[dir]++; + break; + case ICMPV6_MGM_REDUCTION: + pstats->mstats.mld_leaves[dir]++; + break; + } + break; +#endif /* CONFIG_IPV6 */ + } + u64_stats_update_end(&pstats->syncp); +} + +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + const struct sk_buff *skb, u8 type, u8 dir) +{ + struct bridge_mcast_stats __percpu *stats; + + /* if multicast_disabled is true then igmp type can't be set */ + if (!type || !br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + br_mcast_stats_add(stats, skb, type, dir); +} + +int br_multicast_init_stats(struct net_bridge *br) +{ + br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!br->mcast_stats) + return -ENOMEM; + + return 0; +} + +void br_multicast_uninit_stats(struct net_bridge *br) +{ + free_percpu(br->mcast_stats); +} + +/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */ +static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src) +{ + dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; + dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX]; +} + +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest) +{ + struct bridge_mcast_stats __percpu *stats; + struct br_mcast_stats tdst; + int i; + + memset(dest, 0, sizeof(*dest)); + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + memset(&tdst, 0, sizeof(tdst)); + for_each_possible_cpu(i) { + struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i); + struct br_mcast_stats temp; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); + mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); + mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries); + mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves); + mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports); + mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports); + mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports); + tdst.igmp_parse_errors += temp.igmp_parse_errors; + + mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries); + mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries); + mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves); + mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports); + mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports); + tdst.mld_parse_errors += temp.mld_parse_errors; + } + memcpy(dest, &tdst, sizeof(*dest)); +} + +int br_mdb_hash_init(struct net_bridge *br) +{ + int err; + + err = rhashtable_init(&br->sg_port_tbl, &br_sg_port_rht_params); + if (err) + return err; + + err = rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params); + if (err) { + rhashtable_destroy(&br->sg_port_tbl); + return err; + } + + return 0; +} + +void br_mdb_hash_fini(struct net_bridge *br) +{ + rhashtable_destroy(&br->sg_port_tbl); + rhashtable_destroy(&br->mdb_hash_tbl); +} diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c new file mode 100644 index 000000000..f14beb9a6 --- /dev/null +++ b/net/bridge/br_netfilter_hooks.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> +#include <linux/netfilter_bridge.h> +#include <uapi/linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_arp.h> +#include <linux/in_route.h> +#include <linux/rculist.h> +#include <linux/inetdevice.h> + +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/addrconf.h> +#include <net/route.h> +#include <net/netfilter/br_netfilter.h> +#include <net/netns/generic.h> + +#include <linux/uaccess.h> +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +static unsigned int brnf_net_id __read_mostly; + +struct brnf_net { + bool enabled; + +#ifdef CONFIG_SYSCTL + struct ctl_table_header *ctl_hdr; +#endif + + /* default value is 1 */ + int call_iptables; + int call_ip6tables; + int call_arptables; + + /* default value is 0 */ + int filter_vlan_tagged; + int filter_pppoe_tagged; + int pass_vlan_indev; +}; + +#define IS_IP(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + +#define IS_IPV6(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + +#define IS_ARP(skb) \ + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + +static inline __be16 vlan_proto(const struct sk_buff *skb) +{ + if (skb_vlan_tag_present(skb)) + return skb->protocol; + else if (skb->protocol == htons(ETH_P_8021Q)) + return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; + else + return 0; +} + +static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; +} + +static inline bool is_vlan_ipv6(const struct sk_buff *skb, + const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return vlan_proto(skb) == htons(ETH_P_IPV6) && + brnet->filter_vlan_tagged; +} + +static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; +} + +static inline __be16 pppoe_proto(const struct sk_buff *skb) +{ + return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); +} + +static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return skb->protocol == htons(ETH_P_PPP_SES) && + pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; +} + +static inline bool is_pppoe_ipv6(const struct sk_buff *skb, + const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return skb->protocol == htons(ETH_P_PPP_SES) && + pppoe_proto(skb) == htons(PPP_IPV6) && + brnet->filter_pppoe_tagged; +} + +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; + u16 vlan_tci; + __be16 vlan_proto; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); + +static void nf_bridge_info_free(struct sk_buff *skb) +{ + skb_ext_del(skb, SKB_EXT_BRIDGE_NF); +} + +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? port->br->dev : NULL; +} + +static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) +{ + return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); +} + +unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __cpu_to_be16(ETH_P_8021Q): + return VLAN_HLEN; + case __cpu_to_be16(ETH_P_PPP_SES): + return PPPOE_SES_HLEN; + default: + return 0; + } +} + +static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull(skb, len); + skb->network_header += len; +} + +static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull_rcsum(skb, len); + skb->network_header += len; +} + +/* When handing a packet over to the IP layer + * check whether we have a skb that is in the + * expected format + */ + +static int br_validate_ipv4(struct net *net, struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + + iph = ip_hdr(skb); + + /* Basic sanity checks */ + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + if (!pskb_may_pull(skb, iph->ihl*4)) + goto inhdr_error; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto csum_error; + + len = ntohs(iph->tot_len); + if (skb->len < len) { + __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } else if (len < (iph->ihl*4)) + goto inhdr_error; + + if (pskb_trim_rcsum(skb, len)) { + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); + goto drop; + } + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ + return 0; + +csum_error: + __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); +inhdr_error: + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +void nf_bridge_update_protocol(struct sk_buff *skb) +{ + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + switch (nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: + skb->protocol = htons(ETH_P_8021Q); + break; + case BRNF_PROTO_PPPOE: + skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } +} + +/* Obtain the correct destination MAC address, while preserving the original + * source MAC address. If we already know this address, we just copy it. If we + * don't, we use the neighbour framework to find out. In both cases, we make + * sure that br_handle_frame_finish() is called afterwards. + */ +int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct neighbour *neigh; + struct dst_entry *dst; + + skb->dev = bridge_parent(skb->dev); + if (!skb->dev) + goto free_skb; + dst = skb_dst(skb); + neigh = dst_neigh_lookup_skb(dst, skb); + if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + int ret; + + if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); + skb->dev = nf_bridge->physindev; + ret = br_handle_frame_finish(net, sk, skb); + } else { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. + */ + skb_copy_from_linear_data_offset(skb, + -(ETH_HLEN-ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->bridged_dnat = 1; + /* FIXME Need to refragment */ + ret = neigh->output(neigh, skb); + } + neigh_release(neigh); + return ret; + } +free_skb: + kfree_skb(skb); + return 0; +} + +static inline bool +br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) +{ + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; +} + +/* This requires some explaining. If DNAT has taken place, + * we will need to fix up the destination Ethernet address. + * This is also true when SNAT takes place (for the reply direction). + * + * There are two cases to consider: + * 1. The packet was DNAT'ed to a device in the same bridge + * port group as it was received on. We can still bridge + * the packet. + * 2. The packet was DNAT'ed to a different device, either + * a non-bridged device or another bridge port group. + * The packet will need to be routed. + * + * The correct way of distinguishing between these two cases is to + * call ip_route_input() and to look at skb->dst->dev, which is + * changed to the destination device if ip_route_input() succeeds. + * + * Let's first consider the case that ip_route_input() succeeds: + * + * If the output device equals the logical bridge device the packet + * came in on, we can consider this bridging. The corresponding MAC + * address will be obtained in br_nf_pre_routing_finish_bridge. + * Otherwise, the packet is considered to be routed and we just + * change the destination MAC address so that the packet will + * later be passed up to the IP stack to be routed. For a redirected + * packet, ip_route_input() will give back the localhost as output device, + * which differs from the bridge device. + * + * Let's now consider the case that ip_route_input() fails: + * + * This can be because the destination address is martian, in which case + * the packet will be dropped. + * If IP forwarding is disabled, ip_route_input() will fail, while + * ip_route_output_key() can return success. The source + * address for ip_route_output_key() is set to zero, so ip_route_output_key() + * thinks we're handling a locally generated packet and won't care + * if IP forwarding is enabled. If the output device equals the logical bridge + * device, we proceed as if ip_route_input() succeeded. If it differs from the + * logical bridge port or if ip_route_output_key() fails we drop the packet. + */ +static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct iphdr *iph = ip_hdr(skb); + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + int err; + + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->in_prerouting = 0; + if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + /* If err equals -EHOSTUNREACH the error is due to a + * martian destination or due to the fact that + * forwarding is disabled. For most martian packets, + * ip_route_output_key() will fail. It won't fail for 2 types of + * martian destinations: loopback destinations and destination + * 0.0.0.0. In both cases the packet will be dropped because the + * destination is the loopback device and not the bridge. */ + if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + goto free_skb; + + rt = ip_route_output(net, iph->daddr, 0, + RT_TOS(iph->tos), 0); + if (!IS_ERR(rt)) { + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. */ + if (rt->dst.dev == dev) { + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + goto bridged_dnat; + } + ip_rt_put(rt); + } +free_skb: + kfree_skb(skb); + return 0; + } else { + if (skb_dst(skb)->dev == dev) { +bridged_dnat: + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, + NULL, + br_nf_pre_routing_finish_bridge); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_drop(skb); + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, + br_handle_frame_finish); + return 0; +} + +static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, + const struct net_device *dev, + const struct net *net) +{ + struct net_device *vlan, *br; + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + br = bridge_parent(dev); + + if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) + return br; + + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, + skb_vlan_tag_get(skb) & VLAN_VID_MASK); + + return vlan ? vlan : br; +} + +/* Some common code for IPv4/IPv6 */ +struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + nf_bridge->in_prerouting = 1; + nf_bridge->physindev = skb->dev; + skb->dev = brnf_get_logical_dev(skb, skb->dev, net); + + if (skb->protocol == htons(ETH_P_8021Q)) + nf_bridge->orig_proto = BRNF_PROTO_8021Q; + else if (skb->protocol == htons(ETH_P_PPP_SES)) + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; + + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return skb->dev; +} + +/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. + * Replicate the checks that IPv4 does on packet reception. + * Set skb->dev to the bridge device (i.e. parent of the + * receiving device) to make netfilter happy, the REDIRECT + * target in particular. Save the original destination IP + * address to be able to detect DNAT afterwards. */ +static unsigned int br_nf_pre_routing(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + struct net_bridge_port *p; + struct net_bridge *br; + __u32 len = nf_bridge_encap_header_len(skb); + struct brnf_net *brnet; + + if (unlikely(!pskb_may_pull(skb, len))) + return NF_DROP; + + p = br_port_get_rcu(state->in); + if (p == NULL) + return NF_DROP; + br = p->br; + + brnet = net_generic(state->net, brnf_net_id); + if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) { + if (!brnet->call_ip6tables && + !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) + return NF_ACCEPT; + if (!ipv6_mod_enabled()) { + pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); + return NF_DROP; + } + + nf_bridge_pull_encap_header_rcsum(skb); + return br_nf_pre_routing_ipv6(priv, skb, state); + } + + if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) + return NF_ACCEPT; + + if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && + !is_pppoe_ip(skb, state->net)) + return NF_ACCEPT; + + nf_bridge_pull_encap_header_rcsum(skb); + + if (br_validate_ipv4(state->net, skb)) + return NF_DROP; + + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb, state->net)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; + + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish); + + return NF_STOLEN; +} + + +/* PF_BRIDGE/FORWARD *************************************************/ +static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *in; + + if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { + + if (skb->protocol == htons(ETH_P_IP)) + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; + + if (skb->protocol == htons(ETH_P_IPV6)) + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + in = nf_bridge->physindev; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge_update_protocol(skb); + } else { + in = *((struct net_device **)(skb->cb)); + } + nf_bridge_push_encap_header(skb); + + br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, + br_forward_finish); + return 0; +} + + +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the physdev module. For ARP, indev and outdev are the + * bridge ports. */ +static unsigned int br_nf_forward_ip(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + struct net_device *parent; + u_int8_t pf; + + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_ACCEPT; + + /* Need exclusive nf_bridge_info since we might have multiple + * different physoutdevs. */ + if (!nf_bridge_unshare(skb)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + + parent = bridge_parent(state->out); + if (!parent) + return NF_DROP; + + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) + pf = NFPROTO_IPV6; + else + return NF_ACCEPT; + + nf_bridge_pull_encap_header(skb); + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + if (pf == NFPROTO_IPV4) { + if (br_validate_ipv4(state->net, skb)) + return NF_DROP; + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + + if (pf == NFPROTO_IPV6) { + if (br_validate_ipv6(state->net, skb)) + return NF_DROP; + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + + nf_bridge->physoutdev = skb->dev; + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, + brnf_get_logical_dev(skb, state->in, state->net), + parent, br_nf_forward_finish); + + return NF_STOLEN; +} + +static unsigned int br_nf_forward_arp(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct net_bridge_port *p; + struct net_bridge *br; + struct net_device **d = (struct net_device **)(skb->cb); + struct brnf_net *brnet; + + p = br_port_get_rcu(state->out); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + brnet = net_generic(state->net, brnf_net_id); + if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) + return NF_ACCEPT; + + if (!IS_ARP(skb)) { + if (!is_vlan_arp(skb, state->net)) + return NF_ACCEPT; + nf_bridge_pull_encap_header(skb); + } + + if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) + return NF_DROP; + + if (arp_hdr(skb)->ar_pln != 4) { + if (is_vlan_arp(skb, state->net)) + nf_bridge_push_encap_header(skb); + return NF_ACCEPT; + } + *d = state->in; + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, + state->in, state->out, br_nf_forward_finish); + + return NF_STOLEN; +} + +static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct brnf_frag_data *data; + int err; + + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); + + if (err) { + kfree_skb(skb); + return 0; + } + + if (data->vlan_proto) + __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); + + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(net, sk, skb); +} + +static int +br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)) +{ + unsigned int mtu = ip_skb_dst_mtu(sk, skb); + struct iphdr *iph = ip_hdr(skb); + + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > mtu))) { + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + + return ip_do_fragment(net, sk, skb, output); +} + +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + return PPPOE_SES_HLEN; + return 0; +} + +static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; + + mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(net, sk, skb); + } + + /* This is wrong! We should preserve the original fragment + * boundaries by preserving frag_list rather than refragmenting. + */ + if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && + skb->protocol == htons(ETH_P_IP)) { + struct brnf_frag_data *data; + + if (br_validate_ipv4(net, skb)) + goto drop; + + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; + + data = this_cpu_ptr(&brnf_frag_data_storage); + + if (skb_vlan_tag_present(skb)) { + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + } else { + data->vlan_proto = 0; + } + + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); + } + if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && + skb->protocol == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + struct brnf_frag_data *data; + + if (br_validate_ipv6(net, skb)) + goto drop; + + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + if (v6ops) + return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); + + kfree_skb(skb); + return -EMSGSIZE; + } + nf_bridge_info_free(skb); + return br_dev_queue_push_xmit(net, sk, skb); + drop: + kfree_skb(skb); + return 0; +} + +/* PF_BRIDGE/POST_ROUTING ********************************************/ +static unsigned int br_nf_post_routing(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *realoutdev = bridge_parent(skb->dev); + u_int8_t pf; + + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in + * on a bridge, but was delivered locally and is now being routed: + * + * POST_ROUTING was already invoked from the ip stack. + */ + if (!nf_bridge || !nf_bridge->physoutdev) + return NF_ACCEPT; + + if (!realoutdev) + return NF_DROP; + + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) + pf = NFPROTO_IPV6; + else + return NF_ACCEPT; + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->pkt_otherhost = true; + } + + nf_bridge_pull_encap_header(skb); + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, + NULL, realoutdev, + br_nf_dev_queue_xmit); + + return NF_STOLEN; +} + +/* IP/SABOTAGE *****************************************************/ +/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING + * for the second time. */ +static unsigned int ip_sabotage_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge) { + if (nf_bridge->sabotage_in_done) + return NF_ACCEPT; + + if (!nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev) && + !netif_is_l3_slave(skb->dev)) { + nf_bridge->sabotage_in_done = 1; + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } + } + + return NF_ACCEPT; +} + +/* This is called when br_netfilter has called into iptables/netfilter, + * and DNAT has taken place on a bridge-forwarded packet. + * + * neigh->output has created a new MAC header, with local br0 MAC + * as saddr. + * + * This restores the original MAC saddr of the bridged packet + * before invoking bridge forward logic to transmit the packet. + */ +static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + skb_pull(skb, ETH_HLEN); + nf_bridge->bridged_dnat = 0; + + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); + skb->dev = nf_bridge->physindev; + + nf_bridge->physoutdev = NULL; + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); +} + +static int br_nf_dev_xmit(struct sk_buff *skb) +{ + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge && nf_bridge->bridged_dnat) { + br_nf_pre_routing_finish_bridge_slow(skb); + return 1; + } + return 0; +} + +static const struct nf_br_ops br_ops = { + .br_dev_xmit_hook = br_nf_dev_xmit, +}; + +/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * br_dev_queue_push_xmit is called afterwards */ +static const struct nf_hook_ops br_nf_ops[] = { + { + .hook = br_nf_pre_routing, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_forward_ip, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF - 1, + }, + { + .hook = br_nf_forward_arp, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_post_routing, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_LAST, + }, + { + .hook = ip_sabotage_in, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_FIRST, + }, + { + .hook = ip_sabotage_in, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST, + }, +}; + +static int brnf_device_event(struct notifier_block *unused, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct brnf_net *brnet; + struct net *net; + int ret; + + if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) + return NOTIFY_DONE; + + ASSERT_RTNL(); + + net = dev_net(dev); + brnet = net_generic(net, brnf_net_id); + if (brnet->enabled) + return NOTIFY_OK; + + ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret) + return NOTIFY_BAD; + + brnet->enabled = true; + return NOTIFY_OK; +} + +static struct notifier_block brnf_notifier __read_mostly = { + .notifier_call = brnf_device_event, +}; + +/* recursively invokes nf_hook_slow (again), skipping already-called + * hooks (< NF_BR_PRI_BRNF). + * + * Called with rcu read lock held. + */ +int br_nf_hook_thresh(unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)) +{ + const struct nf_hook_entries *e; + struct nf_hook_state state; + struct nf_hook_ops **ops; + unsigned int i; + int ret; + + e = rcu_dereference(net->nf.hooks_bridge[hook]); + if (!e) + return okfn(net, sk, skb); + + ops = nf_hook_entries_get_hook_ops(e); + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } + + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, + sk, net, okfn); + + ret = nf_hook_slow(skb, &state, e, i); + if (ret == 1) + ret = okfn(net, sk, skb); + + return ret; +} + +#ifdef CONFIG_SYSCTL +static +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write && *(int *)(ctl->data)) + *(int *)(ctl->data) = 1; + return ret; +} + +static struct ctl_table brnf_table[] = { + { + .procname = "bridge-nf-call-arptables", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-call-iptables", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-call-ip6tables", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-filter-vlan-tagged", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-filter-pppoe-tagged", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { + .procname = "bridge-nf-pass-vlan-input-dev", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, + { } +}; + +static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) +{ + brnf->call_iptables = 1; + brnf->call_ip6tables = 1; + brnf->call_arptables = 1; + brnf->filter_vlan_tagged = 0; + brnf->filter_pppoe_tagged = 0; + brnf->pass_vlan_indev = 0; +} + +static int br_netfilter_sysctl_init_net(struct net *net) +{ + struct ctl_table *table = brnf_table; + struct brnf_net *brnet; + + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + } + + brnet = net_generic(net, brnf_net_id); + table[0].data = &brnet->call_arptables; + table[1].data = &brnet->call_iptables; + table[2].data = &brnet->call_ip6tables; + table[3].data = &brnet->filter_vlan_tagged; + table[4].data = &brnet->filter_pppoe_tagged; + table[5].data = &brnet->pass_vlan_indev; + + br_netfilter_sysctl_default(brnet); + + brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table); + if (!brnet->ctl_hdr) { + if (!net_eq(net, &init_net)) + kfree(table); + + return -ENOMEM; + } + + return 0; +} + +static void br_netfilter_sysctl_exit_net(struct net *net, + struct brnf_net *brnet) +{ + struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; + + unregister_net_sysctl_table(brnet->ctl_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static int __net_init brnf_init_net(struct net *net) +{ + return br_netfilter_sysctl_init_net(net); +} +#endif + +static void __net_exit brnf_exit_net(struct net *net) +{ + struct brnf_net *brnet; + + brnet = net_generic(net, brnf_net_id); + if (brnet->enabled) { + nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); + brnet->enabled = false; + } + +#ifdef CONFIG_SYSCTL + br_netfilter_sysctl_exit_net(net, brnet); +#endif +} + +static struct pernet_operations brnf_net_ops __read_mostly = { +#ifdef CONFIG_SYSCTL + .init = brnf_init_net, +#endif + .exit = brnf_exit_net, + .id = &brnf_net_id, + .size = sizeof(struct brnf_net), +}; + +static int __init br_netfilter_init(void) +{ + int ret; + + ret = register_pernet_subsys(&brnf_net_ops); + if (ret < 0) + return ret; + + ret = register_netdevice_notifier(&brnf_notifier); + if (ret < 0) { + unregister_pernet_subsys(&brnf_net_ops); + return ret; + } + + RCU_INIT_POINTER(nf_br_ops, &br_ops); + printk(KERN_NOTICE "Bridge firewalling registered\n"); + return 0; +} + +static void __exit br_netfilter_fini(void) +{ + RCU_INIT_POINTER(nf_br_ops, NULL); + unregister_netdevice_notifier(&brnf_notifier); + unregister_pernet_subsys(&brnf_net_ops); +} + +module_init(br_netfilter_init); +module_exit(br_netfilter_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); +MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); +MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c new file mode 100644 index 000000000..6b07f3067 --- /dev/null +++ b/net/bridge/br_netfilter_ipv6.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_arp.h> +#include <linux/in_route.h> +#include <linux/inetdevice.h> + +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/addrconf.h> +#include <net/route.h> +#include <net/netfilter/br_netfilter.h> + +#include <linux/uaccess.h> +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff + * anyway + */ +static int br_nf_check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + u32 pkt_len; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = nh[off + 1] + 2; + + switch (nh[off]) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (nh[off + 1] != 4 || (off & 3) != 2) + goto bad; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + goto bad; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pskb_trim_rcsum(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + nh = skb_network_header(skb); + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; +} + +int br_validate_ipv6(struct net *net, struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct inet6_dev *idev = __in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + hdr = ipv6_hdr(skb); + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +static inline bool +br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) +{ + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; +} + +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. + */ +static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + nf_bridge->in_prerouting = 0; + if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); + + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_drop(skb); + skb_dst_set_noref(skb, &rt->dst); + } + + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, + skb->dev, NULL, br_handle_frame_finish); + + return 0; +} + +/* Replicate the checks that IPv6 does on packet reception and pass the packet + * to ip6tables. + */ +unsigned int br_nf_pre_routing_ipv6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_bridge_info *nf_bridge; + + if (br_validate_ipv6(state->net, skb)) + return NF_DROP; + + nf_bridge = nf_bridge_alloc(skb); + if (!nf_bridge) + return NF_DROP; + if (!setup_pre_routing(skb, state->net)) + return NF_DROP; + + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + + skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, + skb->dev, NULL, + br_nf_pre_routing_finish_ipv6); + + return NF_STOLEN; +} diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c new file mode 100644 index 000000000..31b00ba5d --- /dev/null +++ b/net/bridge/br_netlink.c @@ -0,0 +1,1744 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Bridge netlink control interface + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <uapi/linux/if_bridge.h> + +#include "br_private.h" +#include "br_private_stp.h" +#include "br_private_tunnel.h" + +static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, + u32 filter_mask) +{ + struct net_bridge_vlan *v; + u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0; + u16 flags, pvid; + int num_vlans = 0; + + if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; + + pvid = br_get_pvid(vg); + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + flags = 0; + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v)) + continue; + if (v->vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((v->vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = v->vid; + continue; + } else { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } +initvars: + vid_range_start = v->vid; + vid_range_end = v->vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + if ((vid_range_end - vid_range_start) > 0) + num_vlans += 2; + else + num_vlans += 1; + } + + return num_vlans; +} + +static int br_get_num_vlan_infos(struct net_bridge_vlan_group *vg, + u32 filter_mask) +{ + int num_vlans; + + if (!vg) + return 0; + + if (filter_mask & RTEXT_FILTER_BRVLAN) + return vg->num_vlans; + + rcu_read_lock(); + num_vlans = __get_num_vlan_infos(vg, filter_mask); + rcu_read_unlock(); + + return num_vlans; +} + +static size_t br_get_link_af_size_filtered(const struct net_device *dev, + u32 filter_mask) +{ + struct net_bridge_vlan_group *vg = NULL; + struct net_bridge_port *p = NULL; + struct net_bridge *br; + int num_vlan_infos; + size_t vinfo_sz = 0; + + rcu_read_lock(); + if (netif_is_bridge_port(dev)) { + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + } else if (dev->priv_flags & IFF_EBRIDGE) { + br = netdev_priv(dev); + vg = br_vlan_group_rcu(br); + } + num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask); + rcu_read_unlock(); + + if (p && (p->flags & BR_VLAN_TUNNEL)) + vinfo_sz += br_get_vlan_tunnel_info_size(vg); + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + + return vinfo_sz; +} + +static inline size_t br_port_info_size(void) +{ + return nla_total_size(1) /* IFLA_BRPORT_STATE */ + + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + + nla_total_size(4) /* IFLA_BRPORT_COST */ + + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_BCAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */ + + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_COST */ + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_ID */ + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_NO */ + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */ + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_CONFIG_PENDING */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */ + + nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ +#endif + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */ + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */ + + 0; +} + +static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */ + + nla_total_size(br_get_link_af_size_filtered(dev, + filter_mask)) /* IFLA_AF_SPEC */ + + nla_total_size(4); /* IFLA_BRPORT_BACKUP_PORT */ +} + +static int br_port_fill_attrs(struct sk_buff *skb, + const struct net_bridge_port *p) +{ + u8 mode = !!(p->flags & BR_HAIRPIN_MODE); + struct net_bridge_port *backup_p; + u64 timerval; + + if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || + nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || + nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || + nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, + !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, + !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_TO_UCAST, + !!(p->flags & BR_MULTICAST_TO_UNICAST)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, + !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD, + !!(p->flags & BR_MCAST_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_BCAST_FLOOD, + !!(p->flags & BR_BCAST_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, + !!(p->flags & BR_PROXYARP_WIFI)) || + nla_put(skb, IFLA_BRPORT_ROOT_ID, sizeof(struct ifla_bridge_id), + &p->designated_root) || + nla_put(skb, IFLA_BRPORT_BRIDGE_ID, sizeof(struct ifla_bridge_id), + &p->designated_bridge) || + nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_PORT, p->designated_port) || + nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_COST, p->designated_cost) || + nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id) || + nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) || + nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + p->topology_change_ack) || + nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || + nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & + BR_VLAN_TUNNEL)) || + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) || + nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS, + !!(p->flags & BR_NEIGH_SUPPRESS)) || + nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags & + BR_MRP_LOST_CONT)) || + nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN, + !!(p->flags & BR_MRP_LOST_IN_CONT)) || + nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED))) + return -EMSGSIZE; + + timerval = br_timer_value(&p->message_age_timer); + if (nla_put_u64_64bit(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval, + IFLA_BRPORT_PAD)) + return -EMSGSIZE; + timerval = br_timer_value(&p->forward_delay_timer); + if (nla_put_u64_64bit(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval, + IFLA_BRPORT_PAD)) + return -EMSGSIZE; + timerval = br_timer_value(&p->hold_timer); + if (nla_put_u64_64bit(skb, IFLA_BRPORT_HOLD_TIMER, timerval, + IFLA_BRPORT_PAD)) + return -EMSGSIZE; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER, + p->multicast_router)) + return -EMSGSIZE; +#endif + + /* we might be called only with br->lock */ + rcu_read_lock(); + backup_p = rcu_dereference(p->backup_port); + if (backup_p) + nla_put_u32(skb, IFLA_BRPORT_BACKUP_PORT, + backup_p->dev->ifindex); + rcu_read_unlock(); + + return 0; +} + +static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start, + u16 vid_end, u16 flags) +{ + struct bridge_vlan_info vinfo; + + if ((vid_end - vid_start) > 0) { + /* add range to skb */ + vinfo.vid = vid_start; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + + vinfo.vid = vid_end; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } else { + vinfo.vid = vid_start; + vinfo.flags = flags; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, + struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *v; + u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0; + u16 flags, pvid; + int err = 0; + + /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan + * and mark vlan info with begin and end flags + * if vlaninfo represents a range + */ + pvid = br_get_pvid(vg); + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + flags = 0; + if (!br_vlan_should_use(v)) + continue; + if (v->vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((v->vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = v->vid; + continue; + } else { + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + +initvars: + vid_range_start = v->vid; + vid_range_end = v->vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + /* Call it once more to send any left over vlans */ + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + + return 0; +} + +static int br_fill_ifvlaninfo(struct sk_buff *skb, + struct net_bridge_vlan_group *vg) +{ + struct bridge_vlan_info vinfo; + struct net_bridge_vlan *v; + u16 pvid; + + pvid = br_get_pvid(vg); + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + + vinfo.vid = v->vid; + vinfo.flags = 0; + if (v->vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +/* + * Create one netlink message for one interface + * Contains port and master info as well as carrier and bridge state. + */ +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) +{ + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + struct nlattr *af = NULL; + struct net_bridge *br; + struct ifinfomsg *hdr; + struct nlmsghdr *nlh; + + if (port) + br = port->br; + else + br = netdev_priv(dev); + + br_debug(br, "br_fill_info event %d port %s master %s\n", + event, dev->name, br->dev->name); + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_BRIDGE; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MASTER, br->dev->ifindex) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) + goto nla_put_failure; + + if (event == RTM_NEWLINK && port) { + struct nlattr *nest; + + nest = nla_nest_start(skb, IFLA_PROTINFO); + if (nest == NULL || br_port_fill_attrs(skb, port) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + if (filter_mask & (RTEXT_FILTER_BRVLAN | + RTEXT_FILTER_BRVLAN_COMPRESSED | + RTEXT_FILTER_MRP)) { + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + } + + /* Check if the VID information is requested */ + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { + struct net_bridge_vlan_group *vg; + int err; + + /* RCU needed because of the VLAN locking rules (rcu || rtnl) */ + rcu_read_lock(); + if (port) + vg = nbp_vlan_group_rcu(port); + else + vg = br_vlan_group_rcu(br); + + if (!vg || !vg->num_vlans) { + rcu_read_unlock(); + goto done; + } + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + err = br_fill_ifvlaninfo_compressed(skb, vg); + else + err = br_fill_ifvlaninfo(skb, vg); + + if (port && (port->flags & BR_VLAN_TUNNEL)) + err = br_fill_vlan_tunnel_info(skb, vg); + rcu_read_unlock(); + if (err) + goto nla_put_failure; + } + + if (filter_mask & RTEXT_FILTER_MRP) { + int err; + + if (!br_mrp_enabled(br) || port) + goto done; + + rcu_read_lock(); + err = br_mrp_fill_info(skb, br); + rcu_read_unlock(); + + if (err) + goto nla_put_failure; + } + +done: + if (af) + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Notify listeners of a change in bridge or port information */ +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port) +{ + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + struct net_device *dev; + struct sk_buff *skb; + int err = -ENOBUFS; + struct net *net; + u16 port_no = 0; + + if (WARN_ON(!port && !br)) + return; + + if (port) { + dev = port->dev; + br = port->br; + port_no = port->port_no; + } else { + dev = br->dev; + } + + net = dev_net(dev); + br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event); + + skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev); + if (err < 0) { + /* -EMSGSIZE implies BUG in br_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_LINK, err); +} + +/* + * Dump information about all ports, in response to GETLINK + */ +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, int nlflags) +{ + struct net_bridge_port *port = br_port_get_rtnl(dev); + + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && + !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) && + !(filter_mask & RTEXT_FILTER_MRP)) + return 0; + + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, + filter_mask, dev); +} + +static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, + int cmd, struct bridge_vlan_info *vinfo, bool *changed, + struct netlink_ext_ack *extack) +{ + bool curr_change; + int err = 0; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + /* if the MASTER flag is set this will act on the global + * per-VLAN entry as well + */ + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags, + &curr_change, extack); + } else { + vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; + err = br_vlan_add(br, vinfo->vid, vinfo->flags, + &curr_change, extack); + } + if (curr_change) + *changed = true; + break; + + case RTM_DELLINK: + if (p) { + if (!nbp_vlan_delete(p, vinfo->vid)) + *changed = true; + + if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) && + !br_vlan_delete(p->br, vinfo->vid)) + *changed = true; + } else if (!br_vlan_delete(br, vinfo->vid)) { + *changed = true; + } + break; + } + + return err; +} + +int br_process_vlan_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct bridge_vlan_info *vinfo_curr, + struct bridge_vlan_info **vinfo_last, + bool *changed, + struct netlink_ext_ack *extack) +{ + int err, rtm_cmd; + + if (!br_vlan_valid_id(vinfo_curr->vid, extack)) + return -EINVAL; + + /* needed for vlan-only NEWVLAN/DELVLAN notifications */ + rtm_cmd = br_afspec_cmd_to_rtm(cmd); + + if (vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (!br_vlan_valid_range(vinfo_curr, *vinfo_last, extack)) + return -EINVAL; + *vinfo_last = vinfo_curr; + return 0; + } + + if (*vinfo_last) { + struct bridge_vlan_info tmp_vinfo; + int v, v_change_start = 0; + + if (!br_vlan_valid_range(vinfo_curr, *vinfo_last, extack)) + return -EINVAL; + + memcpy(&tmp_vinfo, *vinfo_last, + sizeof(struct bridge_vlan_info)); + for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { + bool curr_change = false; + + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo, &curr_change, + extack); + if (err) + break; + if (curr_change) { + *changed = curr_change; + if (!v_change_start) + v_change_start = v; + } else { + /* nothing to notify yet */ + if (!v_change_start) + continue; + br_vlan_notify(br, p, v_change_start, + v - 1, rtm_cmd); + v_change_start = 0; + } + cond_resched(); + } + /* v_change_start is set only if the last/whole range changed */ + if (v_change_start) + br_vlan_notify(br, p, v_change_start, + v - 1, rtm_cmd); + + *vinfo_last = NULL; + + return err; + } + + err = br_vlan_info(br, p, cmd, vinfo_curr, changed, extack); + if (*changed) + br_vlan_notify(br, p, vinfo_curr->vid, 0, rtm_cmd); + + return err; +} + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd, bool *changed, + struct netlink_ext_ack *extack) +{ + struct bridge_vlan_info *vinfo_curr = NULL; + struct bridge_vlan_info *vinfo_last = NULL; + struct nlattr *attr; + struct vtunnel_info tinfo_last = {}; + struct vtunnel_info tinfo_curr = {}; + int err = 0, rem; + + nla_for_each_nested(attr, af_spec, rem) { + err = 0; + switch (nla_type(attr)) { + case IFLA_BRIDGE_VLAN_TUNNEL_INFO: + if (!p || !(p->flags & BR_VLAN_TUNNEL)) + return -EINVAL; + err = br_parse_vlan_tunnel_info(attr, &tinfo_curr); + if (err) + return err; + err = br_process_vlan_tunnel_info(br, p, cmd, + &tinfo_curr, + &tinfo_last, + changed); + if (err) + return err; + break; + case IFLA_BRIDGE_VLAN_INFO: + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo_curr = nla_data(attr); + err = br_process_vlan_info(br, p, cmd, vinfo_curr, + &vinfo_last, changed, + extack); + if (err) + return err; + break; + case IFLA_BRIDGE_MRP: + err = br_mrp_parse(br, p, attr, cmd, extack); + if (err) + return err; + break; + } + } + + return err; +} + +static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, + [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 }, + [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, + [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, + [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 }, + [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, +}; + +/* Change the state of the port and notify spanning tree */ +static int br_set_port_state(struct net_bridge_port *p, u8 state) +{ + if (state > BR_STATE_BLOCKING) + return -EINVAL; + + /* if kernel STP is running, don't allow changes */ + if (p->br->stp_enabled == BR_KERNEL_STP) + return -EBUSY; + + /* if device is not up, change is not allowed + * if link is not present, only allowable state is disabled + */ + if (!netif_running(p->dev) || + (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) + return -ENETDOWN; + + br_set_state(p, state); + br_port_state_selection(p->br); + return 0; +} + +/* Set/clear or port flags based on attribute */ +static int br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) +{ + unsigned long flags; + int err; + + if (!tb[attrtype]) + return 0; + + if (nla_get_u8(tb[attrtype])) + flags = p->flags | mask; + else + flags = p->flags & ~mask; + + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + + p->flags = flags; + return 0; +} + +/* Process bridge protocol info on port */ +static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) +{ + unsigned long old_flags = p->flags; + bool br_vlan_tunnel_old = false; + int err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + if (err) + return err; + + br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false; + err = br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + if (err) + return err; + + if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL)) + nbp_vlan_tunnel_info_flush(p); + + if (tb[IFLA_BRPORT_COST]) { + err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_PRIORITY]) { + err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_STATE]) { + err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_FLUSH]) + br_fdb_delete_by_port(p->br, p, 0, 0); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) { + u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]); + + err = br_multicast_set_port_router(p, mcast_router); + if (err) + return err; + } +#endif + + if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { + u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]); + + if (fwd_mask & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = fwd_mask; + } + + err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, + BR_NEIGH_SUPPRESS); + if (err) + return err; + + err = br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED); + if (err) + return err; + + if (tb[IFLA_BRPORT_BACKUP_PORT]) { + struct net_device *backup_dev = NULL; + u32 backup_ifindex; + + backup_ifindex = nla_get_u32(tb[IFLA_BRPORT_BACKUP_PORT]); + if (backup_ifindex) { + backup_dev = __dev_get_by_index(dev_net(p->dev), + backup_ifindex); + if (!backup_dev) + return -ENOENT; + } + + err = nbp_backup_change(p, backup_dev); + if (err) + return err; + } + + br_port_flags_change(p, old_flags ^ p->flags); + return 0; +} + +/* Change state and parameters on port. */ +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); + struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + struct net_bridge_port *p; + struct nlattr *protinfo; + struct nlattr *afspec; + bool changed = false; + int err = 0; + + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!protinfo && !afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the bridge + */ + if (!p && !afspec) + return -EINVAL; + + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested_deprecated(tb, IFLA_BRPORT_MAX, + protinfo, + br_port_policy, + NULL); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatibility with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } + if (err) + goto out; + changed = true; + } + + if (afspec) + err = br_afspec(br, p, afspec, RTM_SETLINK, &changed, extack); + + if (changed) + br_ifinfo_notify(RTM_NEWLINK, br, p); +out: + return err; +} + +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) +{ + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); + struct net_bridge_port *p; + struct nlattr *afspec; + bool changed = false; + int err = 0; + + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL); + if (changed) + /* Send RTM_NEWLINK because userspace + * expects RTM_NEWLINK for vlan dels + */ + br_ifinfo_notify(RTM_NEWLINK, br, p); + + return err; +} + +static int br_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + return 0; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + if (defpvid >= VLAN_VID_MASK) + return -EINVAL; + } +#endif + + return 0; +} + +static int br_port_slave_changelink(struct net_device *brdev, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(brdev); + int ret; + + if (!data) + return 0; + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; +} + +static int br_port_fill_slave_info(struct sk_buff *skb, + const struct net_device *brdev, + const struct net_device *dev) +{ + return br_port_fill_attrs(skb, br_port_get_rtnl(dev)); +} + +static size_t br_port_get_slave_size(const struct net_device *brdev, + const struct net_device *dev) +{ + return br_port_info_size(); +} + +static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { + [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, + [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, + [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, + [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 }, + [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, + [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 }, + [IFLA_BR_GROUP_FWD_MASK] = { .type = NLA_U16 }, + [IFLA_BR_GROUP_ADDR] = { .type = NLA_BINARY, + .len = ETH_ALEN }, + [IFLA_BR_MCAST_ROUTER] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_SNOOPING] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_QUERIER] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NLA_U32 }, + [IFLA_BR_MCAST_HASH_MAX] = { .type = NLA_U32 }, + [IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 }, + [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 }, + [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_MCAST_QUERY_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 }, + [IFLA_BR_NF_CALL_IPTABLES] = { .type = NLA_U8 }, + [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 }, + [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, + [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 }, + [IFLA_BR_MULTI_BOOLOPT] = + NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), +}; + +static int br_changelink(struct net_device *brdev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(brdev); + int err; + + if (!data) + return 0; + + if (data[IFLA_BR_FORWARD_DELAY]) { + err = br_set_forward_delay(br, nla_get_u32(data[IFLA_BR_FORWARD_DELAY])); + if (err) + return err; + } + + if (data[IFLA_BR_HELLO_TIME]) { + err = br_set_hello_time(br, nla_get_u32(data[IFLA_BR_HELLO_TIME])); + if (err) + return err; + } + + if (data[IFLA_BR_MAX_AGE]) { + err = br_set_max_age(br, nla_get_u32(data[IFLA_BR_MAX_AGE])); + if (err) + return err; + } + + if (data[IFLA_BR_AGEING_TIME]) { + err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME])); + if (err) + return err; + } + + if (data[IFLA_BR_STP_STATE]) { + u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); + + err = br_stp_set_enabled(br, stp_enabled, extack); + if (err) + return err; + } + + if (data[IFLA_BR_PRIORITY]) { + u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]); + + br_stp_set_bridge_priority(br, priority); + } + + if (data[IFLA_BR_VLAN_FILTERING]) { + u8 vlan_filter = nla_get_u8(data[IFLA_BR_VLAN_FILTERING]); + + err = __br_vlan_filter_toggle(br, vlan_filter); + if (err) + return err; + } + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + __be16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]); + + err = __br_vlan_set_proto(br, vlan_proto); + if (err) + return err; + } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + err = __br_vlan_set_default_pvid(br, defpvid, extack); + if (err) + return err; + } + + if (data[IFLA_BR_VLAN_STATS_ENABLED]) { + __u8 vlan_stats = nla_get_u8(data[IFLA_BR_VLAN_STATS_ENABLED]); + + err = br_vlan_set_stats(br, vlan_stats); + if (err) + return err; + } + + if (data[IFLA_BR_VLAN_STATS_PER_PORT]) { + __u8 per_port = nla_get_u8(data[IFLA_BR_VLAN_STATS_PER_PORT]); + + err = br_vlan_set_stats_per_port(br, per_port); + if (err) + return err; + } +#endif + + if (data[IFLA_BR_GROUP_FWD_MASK]) { + u16 fwd_mask = nla_get_u16(data[IFLA_BR_GROUP_FWD_MASK]); + + if (fwd_mask & BR_GROUPFWD_RESTRICTED) + return -EINVAL; + br->group_fwd_mask = fwd_mask; + } + + if (data[IFLA_BR_GROUP_ADDR]) { + u8 new_addr[ETH_ALEN]; + + if (nla_len(data[IFLA_BR_GROUP_ADDR]) != ETH_ALEN) + return -EINVAL; + memcpy(new_addr, nla_data(data[IFLA_BR_GROUP_ADDR]), ETH_ALEN); + if (!is_link_local_ether_addr(new_addr)) + return -EINVAL; + if (new_addr[5] == 1 || /* 802.3x Pause address */ + new_addr[5] == 2 || /* 802.3ad Slow protocols */ + new_addr[5] == 3) /* 802.1X PAE address */ + return -EINVAL; + spin_lock_bh(&br->lock); + memcpy(br->group_addr, new_addr, sizeof(br->group_addr)); + spin_unlock_bh(&br->lock); + br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true); + br_recalculate_fwd_mask(br); + } + + if (data[IFLA_BR_FDB_FLUSH]) + br_fdb_flush(br); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (data[IFLA_BR_MCAST_ROUTER]) { + u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]); + + err = br_multicast_set_router(br, multicast_router); + if (err) + return err; + } + + if (data[IFLA_BR_MCAST_SNOOPING]) { + u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]); + + br_multicast_toggle(br, mcast_snooping); + } + + if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) { + u8 val; + + val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]); + br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val); + } + + if (data[IFLA_BR_MCAST_QUERIER]) { + u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]); + + err = br_multicast_set_querier(br, mcast_querier); + if (err) + return err; + } + + if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) + br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n", + RHT_ELASTICITY); + + if (data[IFLA_BR_MCAST_HASH_MAX]) + br->hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]); + + if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) { + u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]); + + br->multicast_last_member_count = val; + } + + if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) { + u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]); + + br->multicast_startup_query_count = val; + } + + if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]); + + br->multicast_last_member_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]); + + br->multicast_membership_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_QUERIER_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]); + + br->multicast_querier_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_QUERY_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); + + br->multicast_query_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]); + + br->multicast_query_response_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); + + br->multicast_startup_query_interval = clock_t_to_jiffies(val); + } + + if (data[IFLA_BR_MCAST_STATS_ENABLED]) { + __u8 mcast_stats; + + mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]); + br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!mcast_stats); + } + + if (data[IFLA_BR_MCAST_IGMP_VERSION]) { + __u8 igmp_version; + + igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]); + err = br_multicast_set_igmp_version(br, igmp_version); + if (err) + return err; + } + +#if IS_ENABLED(CONFIG_IPV6) + if (data[IFLA_BR_MCAST_MLD_VERSION]) { + __u8 mld_version; + + mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]); + err = br_multicast_set_mld_version(br, mld_version); + if (err) + return err; + } +#endif +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (data[IFLA_BR_NF_CALL_IPTABLES]) { + u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]); + + br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val); + } + + if (data[IFLA_BR_NF_CALL_IP6TABLES]) { + u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]); + + br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val); + } + + if (data[IFLA_BR_NF_CALL_ARPTABLES]) { + u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]); + + br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val); + } +#endif + + if (data[IFLA_BR_MULTI_BOOLOPT]) { + struct br_boolopt_multi *bm; + + bm = nla_data(data[IFLA_BR_MULTI_BOOLOPT]); + err = br_boolopt_multi_toggle(br, bm, extack); + if (err) + return err; + } + + return 0; +} + +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + err = register_netdevice(dev); + if (err) + return err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data, extack); + if (err) + br_dev_delete(dev, NULL); + + return err; +} + +static size_t br_get_size(const struct net_device *brdev) +{ + return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_PER_PORT */ +#endif + nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */ + nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_BRIDGE_ID */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_ROOT_PORT */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_ROOT_PATH_COST */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ + nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_STATS_ENABLED */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ + nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */ +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */ +#endif + nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */ + 0; +} + +static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) +{ + struct net_bridge *br = netdev_priv(brdev); + u32 forward_delay = jiffies_to_clock_t(br->forward_delay); + u32 hello_time = jiffies_to_clock_t(br->hello_time); + u32 age_time = jiffies_to_clock_t(br->max_age); + u32 ageing_time = jiffies_to_clock_t(br->ageing_time); + u32 stp_enabled = br->stp_enabled; + u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; + u8 vlan_enabled = br_vlan_enabled(br->dev); + struct br_boolopt_multi bm; + u64 clockval; + + clockval = br_timer_value(&br->hello_timer); + if (nla_put_u64_64bit(skb, IFLA_BR_HELLO_TIMER, clockval, IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = br_timer_value(&br->tcn_timer); + if (nla_put_u64_64bit(skb, IFLA_BR_TCN_TIMER, clockval, IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = br_timer_value(&br->topology_change_timer); + if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = br_timer_value(&br->gc_work.timer); + if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) + return -EMSGSIZE; + + br_boolopt_multi_get(br, &bm); + if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || + nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || + nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) || + nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || + nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || + nla_put_u16(skb, IFLA_BR_PRIORITY, priority) || + nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled) || + nla_put_u16(skb, IFLA_BR_GROUP_FWD_MASK, br->group_fwd_mask) || + nla_put(skb, IFLA_BR_BRIDGE_ID, sizeof(struct ifla_bridge_id), + &br->bridge_id) || + nla_put(skb, IFLA_BR_ROOT_ID, sizeof(struct ifla_bridge_id), + &br->designated_root) || + nla_put_u16(skb, IFLA_BR_ROOT_PORT, br->root_port) || + nla_put_u32(skb, IFLA_BR_ROOT_PATH_COST, br->root_path_cost) || + nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) || + nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + br->topology_change_detected) || + nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) || + nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm)) + return -EMSGSIZE; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) || + nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) || + nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, + br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || + nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) + return -EMSGSIZE; +#endif +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) || + nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, + br_opt_get(br, BROPT_MULTICAST_ENABLED)) || + nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, + br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) || + nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, + br_opt_get(br, BROPT_MULTICAST_QUERIER)) || + nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED, + br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) || + nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) || + nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) || + nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT, + br->multicast_last_member_count) || + nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, + br->multicast_startup_query_count) || + nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, + br->multicast_igmp_version)) + return -EMSGSIZE; +#if IS_ENABLED(CONFIG_IPV6) + if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, + br->multicast_mld_version)) + return -EMSGSIZE; +#endif + clockval = jiffies_to_clock_t(br->multicast_last_member_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = jiffies_to_clock_t(br->multicast_membership_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = jiffies_to_clock_t(br->multicast_querier_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = jiffies_to_clock_t(br->multicast_query_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = jiffies_to_clock_t(br->multicast_query_response_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; + clockval = jiffies_to_clock_t(br->multicast_startup_query_interval); + if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval, + IFLA_BR_PAD)) + return -EMSGSIZE; +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES, + br_opt_get(br, BROPT_NF_CALL_IPTABLES) ? 1 : 0) || + nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES, + br_opt_get(br, BROPT_NF_CALL_IP6TABLES) ? 1 : 0) || + nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES, + br_opt_get(br, BROPT_NF_CALL_ARPTABLES) ? 1 : 0)) + return -EMSGSIZE; +#endif + + return 0; +} + +static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) +{ + struct net_bridge_port *p = NULL; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge *br; + int numvls = 0; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + br = netdev_priv(dev); + vg = br_vlan_group(br); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); + break; + default: + return 0; + } + + if (vg) { + /* we need to count all, even placeholder entries */ + list_for_each_entry(v, &vg->vlan_list, vlist) + numvls++; + } + + return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + + (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) + + nla_total_size(0); +} + +static int br_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx, int attr) +{ + struct nlattr *nla __maybe_unused; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge *br; + struct nlattr *nest; + int vl_idx = 0; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + br = netdev_priv(dev); + vg = br_vlan_group(br); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); + break; + default: + return -EINVAL; + } + + nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BRIDGE); + if (!nest) + return -EMSGSIZE; + + if (vg) { + u16 pvid; + + pvid = br_get_pvid(vg); + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct bridge_vlan_xstats vxi; + struct br_vlan_stats stats; + + if (++vl_idx < *prividx) + continue; + memset(&vxi, 0, sizeof(vxi)); + vxi.vid = v->vid; + vxi.flags = v->flags; + if (v->vid == pvid) + vxi.flags |= BRIDGE_VLAN_INFO_PVID; + br_vlan_get_stats(v, &stats); + vxi.rx_bytes = stats.rx_bytes; + vxi.rx_packets = stats.rx_packets; + vxi.tx_bytes = stats.tx_bytes; + vxi.tx_packets = stats.tx_packets; + + if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + goto nla_put_failure; + } + } + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (++vl_idx >= *prividx) { + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, + sizeof(struct br_mcast_stats), + BRIDGE_XSTATS_PAD); + if (!nla) + goto nla_put_failure; + br_multicast_get_stats(br, p, nla_data(nla)); + } +#endif + + if (p) { + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_STP, + sizeof(p->stp_xstats), + BRIDGE_XSTATS_PAD); + if (!nla) + goto nla_put_failure; + + spin_lock_bh(&br->lock); + memcpy(nla_data(nla), &p->stp_xstats, sizeof(p->stp_xstats)); + spin_unlock_bh(&br->lock); + } + + nla_nest_end(skb, nest); + *prividx = 0; + + return 0; + +nla_put_failure: + nla_nest_end(skb, nest); + *prividx = vl_idx; + + return -EMSGSIZE; +} + +static struct rtnl_af_ops br_af_ops __read_mostly = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size_filtered, +}; + +struct rtnl_link_ops br_link_ops __read_mostly = { + .kind = "bridge", + .priv_size = sizeof(struct net_bridge), + .setup = br_dev_setup, + .maxtype = IFLA_BR_MAX, + .policy = br_policy, + .validate = br_validate, + .newlink = br_dev_newlink, + .changelink = br_changelink, + .dellink = br_dev_delete, + .get_size = br_get_size, + .fill_info = br_fill_info, + .fill_linkxstats = br_fill_linkxstats, + .get_linkxstats_size = br_get_linkxstats_size, + + .slave_maxtype = IFLA_BRPORT_MAX, + .slave_policy = br_port_policy, + .slave_changelink = br_port_slave_changelink, + .get_slave_size = br_port_get_slave_size, + .fill_slave_info = br_port_fill_slave_info, +}; + +int __init br_netlink_init(void) +{ + int err; + + br_mdb_init(); + br_vlan_rtnl_init(); + rtnl_af_register(&br_af_ops); + + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); + br_mdb_uninit(); + return err; +} + +void br_netlink_fini(void) +{ + br_mdb_uninit(); + br_vlan_rtnl_uninit(); + rtnl_af_unregister(&br_af_ops); + rtnl_link_unregister(&br_link_ops); +} diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c new file mode 100644 index 000000000..8914290c7 --- /dev/null +++ b/net/bridge/br_netlink_tunnel.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Bridge per vlan tunnel port dst_metadata netlink control interface + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <uapi/linux/if_bridge.h> +#include <net/dst_metadata.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static size_t __get_vlan_tinfo_size(void) +{ + return nla_total_size(0) + /* nest IFLA_BRIDGE_VLAN_TUNNEL_INFO */ + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_VLAN_TUNNEL_ID */ + nla_total_size(sizeof(u16)) + /* IFLA_BRIDGE_VLAN_TUNNEL_VID */ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ +} + +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last) +{ + __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); + __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); + + return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_last)) == 1; +} + +static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *v, *vtbegin = NULL, *vtend = NULL; + int num_tinfos = 0; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) + continue; + + if (!vtbegin) { + goto initvars; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunid_inrange(v, vtend)) { + vtend = v; + continue; + } else { + if ((vtend->vid - vtbegin->vid) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } +initvars: + vtbegin = v; + vtend = v; + } + + if (vtbegin && vtend) { + if ((vtend->vid - vtbegin->vid) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } + + return num_tinfos; +} + +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg) +{ + int num_tinfos; + + if (!vg) + return 0; + + rcu_read_lock(); + num_tinfos = __get_num_vlan_tunnel_infos(vg); + rcu_read_unlock(); + + return num_tinfos * __get_vlan_tinfo_size(); +} + +static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, + __be64 tunnel_id, u16 flags) +{ + __be32 tid = tunnel_id_to_key32(tunnel_id); + struct nlattr *tmap; + + tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); + if (!tmap) + return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, + be32_to_cpu(tid))) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_VID, + vid)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, + flags)) + goto nla_put_failure; + nla_nest_end(skb, tmap); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tmap); + + return -EMSGSIZE; +} + +static int br_fill_vlan_tinfo_range(struct sk_buff *skb, + struct net_bridge_vlan *vtbegin, + struct net_bridge_vlan *vtend) +{ + int err; + + if (vtend && (vtend->vid - vtbegin->vid) > 0) { + /* add range to skb */ + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_BEGIN); + if (err) + return err; + + err = br_fill_vlan_tinfo(skb, vtend->vid, + vtend->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_END); + if (err) + return err; + } else { + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + 0); + if (err) + return err; + } + + return 0; +} + +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vtbegin = NULL; + struct net_bridge_vlan *vtend = NULL; + struct net_bridge_vlan *v; + int err; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v)) + continue; + + if (!v->tinfo.tunnel_dst) + continue; + + if (!vtbegin) { + goto initvars; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunid_inrange(v, vtend)) { + vtend = v; + continue; + } else { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } +initvars: + vtbegin = v; + vtend = v; + } + + if (vtbegin) { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } + + return 0; +} + +static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = { + [IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, +}; + +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed) +{ + int err = 0; + + if (!p) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + if (!err) + *changed = true; + break; + case RTM_DELLINK: + if (!nbp_vlan_tunnel_info_delete(p, vid)) + *changed = true; + break; + } + + return err; +} + +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo) +{ + struct nlattr *tb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; + u32 tun_id; + u16 vid, flags = 0; + int err; + + memset(tinfo, 0, sizeof(*tinfo)); + + err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + attr, vlan_tunnel_policy, NULL); + if (err < 0) + return err; + + if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || + !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) + return -EINVAL; + + tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); + vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); + if (vid >= VLAN_VID_MASK) + return -ERANGE; + + if (tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) + flags = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); + + tinfo->tunid = tun_id; + tinfo->vid = vid; + tinfo->flags = flags; + + return 0; +} + +/* send a notification if v_curr can't enter the range and start a new one */ +static void __vlan_tunnel_handle_range(const struct net_bridge_port *p, + struct net_bridge_vlan **v_start, + struct net_bridge_vlan **v_end, + int v_curr, bool curr_change) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + vg = nbp_vlan_group(p); + if (!vg) + return; + + v = br_vlan_find(vg, v_curr); + + if (!*v_start) + goto out_init; + + if (v && curr_change && br_vlan_can_enter_range(v, *v_end)) { + *v_end = v; + return; + } + + br_vlan_notify(p->br, p, (*v_start)->vid, (*v_end)->vid, RTM_NEWVLAN); +out_init: + /* we start a range only if there are any changes to notify about */ + *v_start = curr_change ? v : NULL; + *v_end = *v_start; +} + +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last, + bool *changed) +{ + int err; + + if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + return -EINVAL; + memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); + } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { + struct net_bridge_vlan *v_start = NULL, *v_end = NULL; + int t, v; + + if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) + return -EINVAL; + if ((tinfo_curr->vid - tinfo_last->vid) != + (tinfo_curr->tunid - tinfo_last->tunid)) + return -EINVAL; + t = tinfo_last->tunid; + for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { + bool curr_change = false; + + err = br_vlan_tunnel_info(p, cmd, v, t, &curr_change); + if (err) + break; + t++; + + if (curr_change) + *changed = curr_change; + __vlan_tunnel_handle_range(p, &v_start, &v_end, v, + curr_change); + } + if (v_start && v_end) + br_vlan_notify(br, p, v_start->vid, v_end->vid, + RTM_NEWVLAN); + if (err) + return err; + + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } else { + if (tinfo_last->flags) + return -EINVAL; + err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, + tinfo_curr->tunid, changed); + if (err) + return err; + br_vlan_notify(br, p, tinfo_curr->vid, 0, RTM_NEWVLAN); + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } + + return 0; +} diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c new file mode 100644 index 000000000..8c69f0c95 --- /dev/null +++ b/net/bridge/br_nf_core.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handle firewalling core + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/in_route.h> +#include <linux/inetdevice.h> +#include <net/route.h> + +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) +{ +} + +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) +{ +} + +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return NULL; +} + +static unsigned int fake_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .update_pmtu = fake_update_pmtu, + .redirect = fake_redirect, + .cow_metrics = fake_cow_metrics, + .neigh_lookup = fake_neigh_lookup, + .mtu = fake_mtu, +}; + +/* + * Initialize bogus route table used to keep netfilter happy. + * Currently, we fill in the PMTU entry because netfilter + * refragmentation needs it, and the rt_flags entry because + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. + */ +static const u32 br_dst_default_metrics[RTAX_MAX] = { + [RTAX_MTU - 1] = 1500, +}; + +void br_netfilter_rtable_init(struct net_bridge *br) +{ + struct rtable *rt = &br->fake_rtable; + + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; + rt->dst.ops = &fake_dst_ops; +} + +int __init br_nf_core_init(void) +{ + return dst_entries_init(&fake_dst_ops); +} + +void br_nf_core_fini(void) +{ + dst_entries_destroy(&fake_dst_ops); +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h new file mode 100644 index 000000000..2b88b17cc --- /dev/null +++ b/net/bridge/br_private.h @@ -0,0 +1,1571 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#ifndef _BR_PRIVATE_H +#define _BR_PRIVATE_H + +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/netpoll.h> +#include <linux/u64_stats_sync.h> +#include <net/route.h> +#include <net/ip6_fib.h> +#include <linux/if_vlan.h> +#include <linux/rhashtable.h> +#include <linux/refcount.h> + +#define BR_HASH_BITS 8 +#define BR_HASH_SIZE (1 << BR_HASH_BITS) + +#define BR_HOLD_TIME (1*HZ) + +#define BR_PORT_BITS 10 +#define BR_MAX_PORTS (1<<BR_PORT_BITS) + +#define BR_MULTICAST_DEFAULT_HASH_MAX 4096 + +#define BR_VERSION "2.3" + +/* Control of forwarding link local multicast */ +#define BR_GROUPFWD_DEFAULT 0 +/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ +enum { + BR_GROUPFWD_STP = BIT(0), + BR_GROUPFWD_MACPAUSE = BIT(1), + BR_GROUPFWD_LACP = BIT(2), +}; + +#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \ + BR_GROUPFWD_LACP) +/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ +#define BR_GROUPFWD_8021AD 0xB801u + +/* Path to usermode spanning tree program */ +#define BR_STP_PROG "/sbin/bridge-stp" + +#define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT) + +typedef struct bridge_id bridge_id; +typedef struct mac_addr mac_addr; +typedef __u16 port_id; + +struct bridge_id { + unsigned char prio[2]; + unsigned char addr[ETH_ALEN]; +}; + +struct mac_addr { + unsigned char addr[ETH_ALEN]; +}; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* our own querier */ +struct bridge_mcast_own_query { + struct timer_list timer; + u32 startup_sent; +}; + +/* other querier */ +struct bridge_mcast_other_query { + struct timer_list timer; + unsigned long delay_time; +}; + +/* selected querier */ +struct bridge_mcast_querier { + struct br_ip addr; + struct net_bridge_port __rcu *port; +}; + +/* IGMP/MLD statistics */ +struct bridge_mcast_stats { + struct br_mcast_stats mstats; + struct u64_stats_sync syncp; +}; +#endif + +struct br_vlan_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; + struct u64_stats_sync syncp; +}; + +struct br_tunnel_info { + __be64 tunnel_id; + struct metadata_dst __rcu *tunnel_dst; +}; + +/* private vlan flags */ +enum { + BR_VLFLAG_PER_PORT_STATS = BIT(0), + BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), +}; + +/** + * struct net_bridge_vlan - per-vlan entry + * + * @vnode: rhashtable member + * @vid: VLAN id + * @flags: bridge vlan flags + * @priv_flags: private (in-kernel) bridge vlan flags + * @state: STP state (e.g. blocking, learning, forwarding) + * @stats: per-cpu VLAN statistics + * @br: if MASTER flag set, this points to a bridge struct + * @port: if MASTER flag unset, this points to a port struct + * @refcnt: if MASTER flag set, this is bumped for each port referencing it + * @brvlan: if MASTER flag unset, this points to the global per-VLAN context + * for this VLAN entry + * @vlist: sorted list of VLAN entries + * @rcu: used for entry destruction + * + * This structure is shared between the global per-VLAN entries contained in + * the bridge rhashtable and the local per-port per-VLAN entries contained in + * the port's rhashtable. The union entries should be interpreted depending on + * the entry flags that are set. + */ +struct net_bridge_vlan { + struct rhash_head vnode; + struct rhash_head tnode; + u16 vid; + u16 flags; + u16 priv_flags; + u8 state; + struct br_vlan_stats __percpu *stats; + union { + struct net_bridge *br; + struct net_bridge_port *port; + }; + union { + refcount_t refcnt; + struct net_bridge_vlan *brvlan; + }; + + struct br_tunnel_info tinfo; + + struct list_head vlist; + + struct rcu_head rcu; +}; + +/** + * struct net_bridge_vlan_group + * + * @vlan_hash: VLAN entry rhashtable + * @vlan_list: sorted VLAN entry list + * @num_vlans: number of total VLAN entries + * @pvid: PVID VLAN id + * @pvid_state: PVID's STP state (e.g. forwarding, learning, blocking) + * + * IMPORTANT: Be careful when checking if there're VLAN entries using list + * primitives because the bridge can have entries in its list which + * are just for global context but not for filtering, i.e. they have + * the master flag set but not the brentry flag. If you have to check + * if there're "real" entries in the bridge please test @num_vlans + */ +struct net_bridge_vlan_group { + struct rhashtable vlan_hash; + struct rhashtable tunnel_hash; + struct list_head vlan_list; + u16 num_vlans; + u16 pvid; + u8 pvid_state; +}; + +/* bridge fdb flags */ +enum { + BR_FDB_LOCAL, + BR_FDB_STATIC, + BR_FDB_STICKY, + BR_FDB_ADDED_BY_USER, + BR_FDB_ADDED_BY_EXT_LEARN, + BR_FDB_OFFLOADED, + BR_FDB_NOTIFY, + BR_FDB_NOTIFY_INACTIVE +}; + +struct net_bridge_fdb_key { + mac_addr addr; + u16 vlan_id; +}; + +struct net_bridge_fdb_entry { + struct rhash_head rhnode; + struct net_bridge_port *dst; + + struct net_bridge_fdb_key key; + struct hlist_node fdb_node; + unsigned long flags; + + /* write-heavy members should not affect lookups */ + unsigned long updated ____cacheline_aligned_in_smp; + unsigned long used; + + struct rcu_head rcu; +}; + +#define MDB_PG_FLAGS_PERMANENT BIT(0) +#define MDB_PG_FLAGS_OFFLOAD BIT(1) +#define MDB_PG_FLAGS_FAST_LEAVE BIT(2) +#define MDB_PG_FLAGS_STAR_EXCL BIT(3) +#define MDB_PG_FLAGS_BLOCKED BIT(4) + +#define PG_SRC_ENT_LIMIT 32 + +#define BR_SGRP_F_DELETE BIT(0) +#define BR_SGRP_F_SEND BIT(1) +#define BR_SGRP_F_INSTALLED BIT(2) + +struct net_bridge_mcast_gc { + struct hlist_node gc_node; + void (*destroy)(struct net_bridge_mcast_gc *gc); +}; + +struct net_bridge_group_src { + struct hlist_node node; + + struct br_ip addr; + struct net_bridge_port_group *pg; + u8 flags; + u8 src_query_rexmit_cnt; + struct timer_list timer; + + struct net_bridge *br; + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; +}; + +struct net_bridge_port_group_sg_key { + struct net_bridge_port *port; + struct br_ip addr; +}; + +struct net_bridge_port_group { + struct net_bridge_port_group __rcu *next; + struct net_bridge_port_group_sg_key key; + unsigned char eth_addr[ETH_ALEN] __aligned(2); + unsigned char flags; + unsigned char filter_mode; + unsigned char grp_query_rexmit_cnt; + unsigned char rt_protocol; + + struct hlist_head src_list; + unsigned int src_ents; + struct timer_list timer; + struct timer_list rexmit_timer; + struct hlist_node mglist; + + struct rhash_head rhnode; + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; +}; + +struct net_bridge_mdb_entry { + struct rhash_head rhnode; + struct net_bridge *br; + struct net_bridge_port_group __rcu *ports; + struct br_ip addr; + bool host_joined; + + struct timer_list timer; + struct hlist_node mdb_node; + + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; +}; + +struct net_bridge_port { + struct net_bridge *br; + struct net_device *dev; + struct list_head list; + + unsigned long flags; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + struct net_bridge_port __rcu *backup_port; + + /* STP */ + u8 priority; + u8 state; + u16 port_no; + unsigned char topology_change_ack; + unsigned char config_pending; + port_id port_id; + port_id designated_port; + bridge_id designated_root; + bridge_id designated_bridge; + u32 path_cost; + u32 designated_cost; + unsigned long designated_age; + + struct timer_list forward_delay_timer; + struct timer_list hold_timer; + struct timer_list message_age_timer; + struct kobject kobj; + struct rcu_head rcu; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct bridge_mcast_own_query ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query ip6_own_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + unsigned char multicast_router; + struct bridge_mcast_stats __percpu *mcast_stats; + struct timer_list multicast_router_timer; + struct hlist_head mglist; + struct hlist_node rlist; +#endif + +#ifdef CONFIG_SYSFS + char sysfs_name[IFNAMSIZ]; +#endif + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; +#endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + u16 group_fwd_mask; + u16 backup_redirected_cnt; + + struct bridge_stp_xstats stp_xstats; +}; + +#define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) + +#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) +#define br_promisc_port(p) ((p)->flags & BR_PROMISC) + +static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + +static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev) +{ + return netif_is_bridge_port(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + +static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev) +{ + return netif_is_bridge_port(dev) ? + rcu_dereference_rtnl(dev->rx_handler_data) : NULL; +} + +enum net_bridge_opts { + BROPT_VLAN_ENABLED, + BROPT_VLAN_STATS_ENABLED, + BROPT_NF_CALL_IPTABLES, + BROPT_NF_CALL_IP6TABLES, + BROPT_NF_CALL_ARPTABLES, + BROPT_GROUP_ADDR_SET, + BROPT_MULTICAST_ENABLED, + BROPT_MULTICAST_QUERIER, + BROPT_MULTICAST_QUERY_USE_IFADDR, + BROPT_MULTICAST_STATS_ENABLED, + BROPT_HAS_IPV6_ADDR, + BROPT_NEIGH_SUPPRESS_ENABLED, + BROPT_MTU_SET_BY_USER, + BROPT_VLAN_STATS_PER_PORT, + BROPT_NO_LL_LEARN, + BROPT_VLAN_BRIDGE_BINDING, +}; + +struct net_bridge { + spinlock_t lock; + spinlock_t hash_lock; + struct list_head port_list; + struct net_device *dev; + struct pcpu_sw_netstats __percpu *stats; + unsigned long options; + /* These fields are accessed on each packet */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + __be16 vlan_proto; + u16 default_pvid; + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + + struct rhashtable fdb_hash_tbl; +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + union { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; +#endif + u16 group_fwd_mask; + u16 group_fwd_mask_required; + + /* STP */ + bridge_id designated_root; + bridge_id bridge_id; + unsigned char topology_change; + unsigned char topology_change_detected; + u16 root_port; + unsigned long max_age; + unsigned long hello_time; + unsigned long forward_delay; + unsigned long ageing_time; + unsigned long bridge_max_age; + unsigned long bridge_hello_time; + unsigned long bridge_forward_delay; + unsigned long bridge_ageing_time; + u32 root_path_cost; + + u8 group_addr[ETH_ALEN]; + + enum { + BR_NO_STP, /* no spanning tree */ + BR_KERNEL_STP, /* old STP in kernel */ + BR_USER_STP, /* new RSTP in userspace */ + } stp_enabled; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + + u32 hash_max; + + u32 multicast_last_member_count; + u32 multicast_startup_query_count; + + u8 multicast_igmp_version; + u8 multicast_router; +#if IS_ENABLED(CONFIG_IPV6) + u8 multicast_mld_version; +#endif + spinlock_t multicast_lock; + unsigned long multicast_last_member_interval; + unsigned long multicast_membership_interval; + unsigned long multicast_querier_interval; + unsigned long multicast_query_interval; + unsigned long multicast_query_response_interval; + unsigned long multicast_startup_query_interval; + + struct rhashtable mdb_hash_tbl; + struct rhashtable sg_port_tbl; + + struct hlist_head mcast_gc_list; + struct hlist_head mdb_list; + struct hlist_head router_list; + + struct timer_list multicast_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; + struct bridge_mcast_querier ip4_querier; + struct bridge_mcast_stats __percpu *mcast_stats; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; + struct bridge_mcast_querier ip6_querier; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + struct work_struct mcast_gc_work; +#endif + + struct timer_list hello_timer; + struct timer_list tcn_timer; + struct timer_list topology_change_timer; + struct delayed_work gc_work; + struct kobject *ifobj; + u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + struct hlist_head fdb_list; + +#if IS_ENABLED(CONFIG_BRIDGE_MRP) + struct list_head mrp_list; +#endif +}; + +struct br_input_skb_cb { + struct net_device *brdev; + + u16 frag_max_size; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + u8 igmp; + u8 mrouters_only:1; +#endif + u8 proxyarp_replied:1; + u8 src_port_isolated:1; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_filtered:1; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + u8 br_netfilter_broute:1; +#endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif +}; + +#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) +#else +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) +#endif + +#define br_printk(level, br, format, args...) \ + printk(level "%s: " format, (br)->dev->name, ##args) + +#define br_err(__br, format, args...) \ + br_printk(KERN_ERR, __br, format, ##args) +#define br_warn(__br, format, args...) \ + br_printk(KERN_WARNING, __br, format, ##args) +#define br_notice(__br, format, args...) \ + br_printk(KERN_NOTICE, __br, format, ##args) +#define br_info(__br, format, args...) \ + br_printk(KERN_INFO, __br, format, ##args) + +#define br_debug(br, format, args...) \ + pr_debug("%s: " format, (br)->dev->name, ##args) + +/* called under bridge lock */ +static inline int br_is_root_bridge(const struct net_bridge *br) +{ + return !memcmp(&br->bridge_id, &br->designated_root, 8); +} + +/* check if a VLAN entry is global */ +static inline bool br_vlan_is_master(const struct net_bridge_vlan *v) +{ + return v->flags & BRIDGE_VLAN_INFO_MASTER; +} + +/* check if a VLAN entry is used by the bridge */ +static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v) +{ + return v->flags & BRIDGE_VLAN_INFO_BRENTRY; +} + +/* check if we should use the vlan entry, returns false if it's only context */ +static inline bool br_vlan_should_use(const struct net_bridge_vlan *v) +{ + if (br_vlan_is_master(v)) { + if (br_vlan_is_brentry(v)) + return true; + else + return false; + } + + return true; +} + +static inline bool nbp_state_should_learn(const struct net_bridge_port *p) +{ + return p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING; +} + +static inline bool br_vlan_valid_id(u16 vid, struct netlink_ext_ack *extack) +{ + bool ret = vid > 0 && vid < VLAN_VID_MASK; + + if (!ret) + NL_SET_ERR_MSG_MOD(extack, "Vlan id is invalid"); + + return ret; +} + +static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur, + const struct bridge_vlan_info *last, + struct netlink_ext_ack *extack) +{ + /* pvid flag is not allowed in ranges */ + if (cur->flags & BRIDGE_VLAN_INFO_PVID) { + NL_SET_ERR_MSG_MOD(extack, "Pvid isn't allowed in a range"); + return false; + } + + /* when cur is the range end, check if: + * - it has range start flag + * - range ids are invalid (end is equal to or before start) + */ + if (last) { + if (cur->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + NL_SET_ERR_MSG_MOD(extack, "Found a new vlan range start while processing one"); + return false; + } else if (!(cur->flags & BRIDGE_VLAN_INFO_RANGE_END)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan range end flag is missing"); + return false; + } else if (cur->vid <= last->vid) { + NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id"); + return false; + } + } + + /* check for required range flags */ + if (!(cur->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN | + BRIDGE_VLAN_INFO_RANGE_END))) { + NL_SET_ERR_MSG_MOD(extack, "Both vlan range flags are missing"); + return false; + } + + return true; +} + +static inline int br_afspec_cmd_to_rtm(int cmd) +{ + switch (cmd) { + case RTM_SETLINK: + return RTM_NEWVLAN; + case RTM_DELLINK: + return RTM_DELVLAN; + } + + return 0; +} + +static inline int br_opt_get(const struct net_bridge *br, + enum net_bridge_opts opt) +{ + return test_bit(opt, &br->options); +} + +int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, + struct netlink_ext_ack *extack); +int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt); +int br_boolopt_multi_toggle(struct net_bridge *br, + struct br_boolopt_multi *bm, + struct netlink_ext_ack *extack); +void br_boolopt_multi_get(const struct net_bridge *br, + struct br_boolopt_multi *bm); +void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on); + +/* br_device.c */ +void br_dev_setup(struct net_device *dev); +void br_dev_delete(struct net_device *dev, struct list_head *list); +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +#ifdef CONFIG_NET_POLL_CONTROLLER +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + netpoll_send_skb(p->np, skb); +} + +int br_netpoll_enable(struct net_bridge_port *p); +void br_netpoll_disable(struct net_bridge_port *p); +#else +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline int br_netpoll_enable(struct net_bridge_port *p) +{ + return 0; +} + +static inline void br_netpoll_disable(struct net_bridge_port *p) +{ +} +#endif + +/* br_fdb.c */ +int br_fdb_init(void); +void br_fdb_fini(void); +int br_fdb_hash_init(struct net_bridge *br); +void br_fdb_hash_fini(struct net_bridge *br); +void br_fdb_flush(struct net_bridge *br); +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid); +void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); +void br_fdb_cleanup(struct work_struct *work); +void br_fdb_delete_by_port(struct net_bridge *br, + const struct net_bridge_port *p, u16 vid, int do_all); +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid); +int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); +int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, + unsigned long off); +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, unsigned long flags); + +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, u16 vid); +int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack); +int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, struct net_device *fdev, int *idx); +int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, + const unsigned char *addr, u16 vid, u32 portid, u32 seq, + struct netlink_ext_ack *extack); +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, + bool swdev_notify); +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, + bool swdev_notify); +void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid, bool offloaded); + +/* br_forward.c */ +enum br_pkt_type { + BR_PKT_UNICAST, + BR_PKT_MULTICAST, + BR_PKT_BROADCAST +}; +int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb); +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, + bool local_rcv, bool local_orig); +int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb); +void br_flood(struct net_bridge *br, struct sk_buff *skb, + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig); + +/* return true if both source port and dest port are isolated */ +static inline bool br_skb_isolated(const struct net_bridge_port *to, + const struct sk_buff *skb) +{ + return BR_INPUT_SKB_CB(skb)->src_port_isolated && + (to->flags & BR_ISOLATED); +} + +/* br_if.c */ +void br_port_carrier_check(struct net_bridge_port *p, bool *notified); +int br_add_bridge(struct net *net, const char *name); +int br_del_bridge(struct net *net, const char *name); +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack); +int br_del_if(struct net_bridge *br, struct net_device *dev); +void br_mtu_auto_adjust(struct net_bridge *br); +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features); +void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); +void br_manage_promisc(struct net_bridge *br); +int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev); + +/* br_input.c */ +int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); +rx_handler_func_t *br_get_rx_handler(const struct net_device *dev); + +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev); +} + +static inline bool br_rx_handler_check_rtnl(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->rx_handler) == br_get_rx_handler(dev); +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + +static inline struct net_bridge_port * +br_port_get_check_rtnl(const struct net_device *dev) +{ + return br_rx_handler_check_rtnl(dev) ? br_port_get_rtnl_rcu(dev) : NULL; +} + +/* br_ioctl.c */ +int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, + void __user *arg); + +/* br_multicast.c */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb, u16 vid); +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid); +int br_multicast_add_port(struct net_bridge_port *port); +void br_multicast_del_port(struct net_bridge_port *port); +void br_multicast_enable_port(struct net_bridge_port *port); +void br_multicast_disable_port(struct net_bridge_port *port); +void br_multicast_init(struct net_bridge *br); +void br_multicast_join_snoopers(struct net_bridge *br); +void br_multicast_leave_snoopers(struct net_bridge *br); +void br_multicast_open(struct net_bridge *br); +void br_multicast_stop(struct net_bridge *br); +void br_multicast_dev_del(struct net_bridge *br); +void br_multicast_flood(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, bool local_rcv, bool local_orig); +int br_multicast_set_router(struct net_bridge *br, unsigned long val); +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); +int br_multicast_toggle(struct net_bridge *br, unsigned long val); +int br_multicast_set_querier(struct net_bridge *br, unsigned long val); +int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val); +#if IS_ENABLED(CONFIG_IPV6) +int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val); +#endif +struct net_bridge_mdb_entry * +br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst); +struct net_bridge_mdb_entry * +br_multicast_new_group(struct net_bridge *br, struct br_ip *group); +struct net_bridge_port_group * +br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, + struct net_bridge_port_group __rcu *next, + unsigned char flags, const unsigned char *src, + u8 filter_mode, u8 rt_protocol); +int br_mdb_hash_init(struct net_bridge *br); +void br_mdb_hash_fini(struct net_bridge *br); +void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, int type); +void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, + int type); +void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + struct net_bridge_port_group __rcu **pp); +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + const struct sk_buff *skb, u8 type, u8 dir); +int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_uninit_stats(struct net_bridge *br); +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest); +void br_mdb_init(void); +void br_mdb_uninit(void); +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, + u8 filter_mode); +void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, + struct net_bridge_port_group *sg); + +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return br->multicast_router == 2 || + (br->multicast_router == 1 && + timer_pending(&br->multicast_router_timer)); +} + +static inline bool +__br_multicast_querier_exists(struct net_bridge *br, + struct bridge_mcast_other_query *querier, + const bool is_ipv6) +{ + bool own_querier_enabled; + + if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { + if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR)) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + + return time_is_before_jiffies(querier->delay_time) && + (own_querier_enabled || timer_pending(&querier->timer)); +} + +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) +{ + switch (eth->h_proto) { + case (htons(ETH_P_IP)): + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); +#if IS_ENABLED(CONFIG_IPV6) + case (htons(ETH_P_IPV6)): + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); +#endif + default: + return false; + } +} + +static inline bool br_multicast_is_star_g(const struct br_ip *ip) +{ + switch (ip->proto) { + case htons(ETH_P_IP): + return ipv4_is_zeronet(ip->src.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return ipv6_addr_any(&ip->src.ip6); +#endif + default: + return false; + } +} + +static inline bool br_multicast_should_handle_mode(const struct net_bridge *br, + __be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + return !!(br->multicast_igmp_version == 3); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return !!(br->multicast_mld_version == 2); +#endif + default: + return false; + } +} + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return BR_INPUT_SKB_CB(skb)->igmp; +} + +static inline unsigned long br_multicast_lmqt(const struct net_bridge *br) +{ + return br->multicast_last_member_interval * + br->multicast_last_member_count; +} + +static inline unsigned long br_multicast_gmi(const struct net_bridge *br) +{ + return br->multicast_membership_interval; +} +#else +static inline int br_multicast_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + return 0; +} + +static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid) +{ + return NULL; +} + +static inline int br_multicast_add_port(struct net_bridge_port *port) +{ + return 0; +} + +static inline void br_multicast_del_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_enable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_disable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_init(struct net_bridge *br) +{ +} + +static inline void br_multicast_join_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_leave_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_open(struct net_bridge *br) +{ +} + +static inline void br_multicast_stop(struct net_bridge *br) +{ +} + +static inline void br_multicast_dev_del(struct net_bridge *br) +{ +} + +static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, + bool local_rcv, bool local_orig) +{ +} + +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return false; +} + +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) +{ + return false; +} + +static inline void br_mdb_init(void) +{ +} + +static inline void br_mdb_uninit(void) +{ +} + +static inline int br_mdb_hash_init(struct net_bridge *br) +{ + return 0; +} + +static inline void br_mdb_hash_fini(struct net_bridge *br) +{ +} + +static inline void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, + const struct sk_buff *skb, + u8 type, u8 dir) +{ +} + +static inline int br_multicast_init_stats(struct net_bridge *br) +{ + return 0; +} + +static inline void br_multicast_uninit_stats(struct net_bridge *br) +{ +} + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return 0; +} +#endif + +/* br_vlan.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, + u16 *vid, u8 *state); +bool br_allowed_egress(struct net_bridge_vlan_group *vg, + const struct sk_buff *skb); +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, + struct net_bridge_vlan_group *vg, + struct sk_buff *skb); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed, struct netlink_ext_ack *extack); +int br_vlan_delete(struct net_bridge *br, u16 vid); +void br_vlan_flush(struct net_bridge *br); +struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); +void br_recalculate_fwd_mask(struct net_bridge *br); +int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); +int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +int br_vlan_set_stats(struct net_bridge *br, unsigned long val); +int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val); +int br_vlan_init(struct net_bridge *br); +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); +int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, + struct netlink_ext_ack *extack); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed, struct netlink_ext_ack *extack); +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +void nbp_vlan_flush(struct net_bridge_port *port); +int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); +int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats); +void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, + void *ptr); +void br_vlan_rtnl_init(void); +void br_vlan_rtnl_uninit(void); +void br_vlan_notify(const struct net_bridge *br, + const struct net_bridge_port *p, + u16 vid, u16 vid_range, + int cmd); +bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end); + +static inline struct net_bridge_vlan_group *br_vlan_group( + const struct net_bridge *br) +{ + return rtnl_dereference(br->vlgrp); +} + +static inline struct net_bridge_vlan_group *nbp_vlan_group( + const struct net_bridge_port *p) +{ + return rtnl_dereference(p->vlgrp); +} + +static inline struct net_bridge_vlan_group *br_vlan_group_rcu( + const struct net_bridge *br) +{ + return rcu_dereference(br->vlgrp); +} + +static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( + const struct net_bridge_port *p) +{ + return rcu_dereference(p->vlgrp); +} + +/* Since bridge now depends on 8021Q module, but the time bridge sees the + * skb, the vlan tag will always be present if the frame was tagged. + */ +static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) +{ + int err = 0; + + if (skb_vlan_tag_present(skb)) { + *vid = skb_vlan_tag_get_id(skb); + } else { + *vid = 0; + err = -EINVAL; + } + + return err; +} + +static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) +{ + if (!vg) + return 0; + + smp_rmb(); + return vg->pvid; +} + +static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) +{ + return v->vid == pvid ? v->flags | BRIDGE_VLAN_INFO_PVID : v->flags; +} +#else +static inline bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, + struct sk_buff *skb, + u16 *vid, u8 *state) +{ + return true; +} + +static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg, + const struct sk_buff *skb) +{ + return true; +} + +static inline bool br_should_learn(struct net_bridge_port *p, + struct sk_buff *skb, u16 *vid) +{ + return true; +} + +static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, + struct net_bridge_vlan_group *vg, + struct sk_buff *skb) +{ + return skb; +} + +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed, struct netlink_ext_ack *extack) +{ + *changed = false; + return -EOPNOTSUPP; +} + +static inline int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void br_vlan_flush(struct net_bridge *br) +{ +} + +static inline void br_recalculate_fwd_mask(struct net_bridge *br) +{ +} + +static inline int br_vlan_init(struct net_bridge *br) +{ + return 0; +} + +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed, struct netlink_ext_ack *extack) +{ + *changed = false; + return -EOPNOTSUPP; +} + +static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void nbp_vlan_flush(struct net_bridge_port *port) +{ +} + +static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, + u16 vid) +{ + return NULL; +} + +static inline int nbp_vlan_init(struct net_bridge_port *port, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) +{ + return 0; +} + +static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) +{ + return 0; +} + +static inline int __br_vlan_filter_toggle(struct net_bridge *br, + unsigned long val) +{ + return -EOPNOTSUPP; +} + +static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p, + u32 filter_mask) +{ + return 0; +} + +static inline struct net_bridge_vlan_group *br_vlan_group( + const struct net_bridge *br) +{ + return NULL; +} + +static inline struct net_bridge_vlan_group *nbp_vlan_group( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline struct net_bridge_vlan_group *br_vlan_group_rcu( + const struct net_bridge *br) +{ + return NULL; +} + +static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ +} + +static inline void br_vlan_port_event(struct net_bridge_port *p, + unsigned long event) +{ +} + +static inline int br_vlan_bridge_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + return 0; +} + +static inline void br_vlan_rtnl_init(void) +{ +} + +static inline void br_vlan_rtnl_uninit(void) +{ +} + +static inline void br_vlan_notify(const struct net_bridge *br, + const struct net_bridge_port *p, + u16 vid, u16 vid_range, + int cmd) +{ +} + +static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return true; +} +#endif + +/* br_vlan_options.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end); +bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v); +size_t br_vlan_opts_nl_size(void); +int br_vlan_process_options(const struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_vlan *range_start, + struct net_bridge_vlan *range_end, + struct nlattr **tb, + struct netlink_ext_ack *extack); + +/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ +static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) +{ + return READ_ONCE(v->state); +} + +static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state) +{ + WRITE_ONCE(v->state, state); +} + +static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg) +{ + return READ_ONCE(vg->pvid_state); +} + +static inline void br_vlan_set_pvid_state(struct net_bridge_vlan_group *vg, + u8 state) +{ + WRITE_ONCE(vg->pvid_state, state); +} + +/* learn_allow is true at ingress and false at egress */ +static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) +{ + switch (state) { + case BR_STATE_LEARNING: + return learn_allow; + case BR_STATE_FORWARDING: + return true; + default: + return false; + } +} +#endif + +struct nf_br_ops { + int (*br_dev_xmit_hook)(struct sk_buff *skb); +}; +extern const struct nf_br_ops __rcu *nf_br_ops; + +/* br_netfilter.c */ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +int br_nf_core_init(void); +void br_nf_core_fini(void); +void br_netfilter_rtable_init(struct net_bridge *); +#else +static inline int br_nf_core_init(void) { return 0; } +static inline void br_nf_core_fini(void) {} +#define br_netfilter_rtable_init(x) +#endif + +/* br_stp.c */ +void br_set_state(struct net_bridge_port *p, unsigned int state); +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); +void br_init_port(struct net_bridge_port *p); +void br_become_designated_port(struct net_bridge_port *p); + +void __br_set_forward_delay(struct net_bridge *br, unsigned long t); +int br_set_forward_delay(struct net_bridge *br, unsigned long x); +int br_set_hello_time(struct net_bridge *br, unsigned long x); +int br_set_max_age(struct net_bridge *br, unsigned long x); +int __set_ageing_time(struct net_device *dev, unsigned long t); +int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time); + + +/* br_stp_if.c */ +void br_stp_enable_bridge(struct net_bridge *br); +void br_stp_disable_bridge(struct net_bridge *br); +int br_stp_set_enabled(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack); +void br_stp_enable_port(struct net_bridge_port *p); +void br_stp_disable_port(struct net_bridge_port *p); +bool br_stp_recalculate_bridge_id(struct net_bridge *br); +void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio); +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost); +ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); + +/* br_stp_bpdu.c */ +struct stp_proto; +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); + +/* br_stp_timer.c */ +void br_stp_timer_init(struct net_bridge *br); +void br_stp_port_timer_init(struct net_bridge_port *p); +unsigned long br_timer_value(const struct timer_list *timer); + +/* br.c */ +#if IS_ENABLED(CONFIG_ATM_LANE) +extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr); +#endif + +/* br_mrp.c */ +#if IS_ENABLED(CONFIG_BRIDGE_MRP) +int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); +int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb); +bool br_mrp_enabled(struct net_bridge *br); +void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p); +int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br); +#else +static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) +{ + return 0; +} + +static inline bool br_mrp_enabled(struct net_bridge *br) +{ + return false; +} + +static inline void br_mrp_port_del(struct net_bridge *br, + struct net_bridge_port *p) +{ +} + +static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + return 0; +} + +#endif + +/* br_netlink.c */ +extern struct rtnl_link_ops br_link_ops; +int br_netlink_init(void); +void br_netlink_fini(void); +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags, + struct netlink_ext_ack *extack); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, + u32 filter_mask, int nlflags); +int br_process_vlan_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct bridge_vlan_info *vinfo_curr, + struct bridge_vlan_info **vinfo_last, + bool *changed, + struct netlink_ext_ack *extack); + +#ifdef CONFIG_SYSFS +/* br_sysfs_if.c */ +extern const struct sysfs_ops brport_sysfs_ops; +int br_sysfs_addif(struct net_bridge_port *p); +int br_sysfs_renameif(struct net_bridge_port *p); + +/* br_sysfs_br.c */ +int br_sysfs_addbr(struct net_device *dev); +void br_sysfs_delbr(struct net_device *dev); + +#else + +static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } +static inline void br_sysfs_delbr(struct net_device *dev) { return; } +#endif /* CONFIG_SYSFS */ + +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask); +void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, + int type); +int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, + struct netlink_ext_ack *extack); +int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); + +static inline void br_switchdev_frame_unmark(struct sk_buff *skb) +{ + skb->offload_fwd_mark = 0; +} +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} + +static inline int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + return 0; +} + +static inline int br_switchdev_port_vlan_add(struct net_device *dev, + u16 vid, u16 flags, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ +} + +static inline void br_switchdev_frame_unmark(struct sk_buff *skb) +{ +} +#endif /* CONFIG_NET_SWITCHDEV */ + +/* br_arp_nd_proxy.c */ +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p); +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg); +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m); +#endif diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h new file mode 100644 index 000000000..6657705b9 --- /dev/null +++ b/net/bridge/br_private_mrp.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _BR_PRIVATE_MRP_H_ +#define _BR_PRIVATE_MRP_H_ + +#include "br_private.h" +#include <uapi/linux/mrp_bridge.h> + +struct br_mrp { + /* list of mrp instances */ + struct list_head list; + + struct net_bridge_port __rcu *p_port; + struct net_bridge_port __rcu *s_port; + struct net_bridge_port __rcu *i_port; + + u32 ring_id; + u16 in_id; + u16 prio; + + enum br_mrp_ring_role_type ring_role; + u8 ring_role_offloaded; + enum br_mrp_ring_state_type ring_state; + u32 ring_transitions; + + enum br_mrp_in_role_type in_role; + u8 in_role_offloaded; + enum br_mrp_in_state_type in_state; + u32 in_transitions; + + struct delayed_work test_work; + u32 test_interval; + unsigned long test_end; + u32 test_count_miss; + u32 test_max_miss; + bool test_monitor; + + struct delayed_work in_test_work; + u32 in_test_interval; + unsigned long in_test_end; + u32 in_test_count_miss; + u32 in_test_max_miss; + + u32 seq_id; + + struct rcu_head rcu; +}; + +/* br_mrp.c */ +int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance); +int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance); +int br_mrp_set_port_state(struct net_bridge_port *p, + enum br_mrp_port_state_type state); +int br_mrp_set_port_role(struct net_bridge_port *p, + enum br_mrp_port_role_type role); +int br_mrp_set_ring_state(struct net_bridge *br, + struct br_mrp_ring_state *state); +int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role); +int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test); +int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state); +int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role); +int br_mrp_start_in_test(struct net_bridge *br, + struct br_mrp_start_in_test *test); + +/* br_mrp_switchdev.c */ +int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp); +int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp); +int br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_ring_role_type role); +int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_ring_state_type state); +int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period, + bool monitor); +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state); +int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, + enum br_mrp_port_role_type role); +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, + u16 in_id, u32 ring_id, + enum br_mrp_in_role_type role); +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_in_state_type state); +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period); + +/* br_mrp_netlink.c */ +int br_mrp_ring_port_open(struct net_device *dev, u8 loc); +int br_mrp_in_port_open(struct net_device *dev, u8 loc); + +#endif /* _BR_PRIVATE_MRP_H */ diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h new file mode 100644 index 000000000..814cf1364 --- /dev/null +++ b/net/bridge/br_private_stp.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#ifndef _BR_PRIVATE_STP_H +#define _BR_PRIVATE_STP_H + +#define BPDU_TYPE_CONFIG 0 +#define BPDU_TYPE_TCN 0x80 + +/* IEEE 802.1D-1998 timer values */ +#define BR_MIN_HELLO_TIME (1*HZ) +#define BR_MAX_HELLO_TIME (10*HZ) + +#define BR_MIN_FORWARD_DELAY (2*HZ) +#define BR_MAX_FORWARD_DELAY (30*HZ) + +#define BR_MIN_MAX_AGE (6*HZ) +#define BR_MAX_MAX_AGE (40*HZ) + +#define BR_MIN_PATH_COST 1 +#define BR_MAX_PATH_COST 65535 + +struct br_config_bpdu { + unsigned int topology_change:1; + unsigned int topology_change_ack:1; + bridge_id root; + int root_path_cost; + bridge_id bridge_id; + port_id port_id; + int message_age; + int max_age; + int hello_time; + int forward_delay; +}; + +/* called under bridge lock */ +static inline int br_is_designated_port(const struct net_bridge_port *p) +{ + return !memcmp(&p->designated_bridge, &p->br->bridge_id, 8) && + (p->designated_port == p->port_id); +} + + +/* br_stp.c */ +void br_become_root_bridge(struct net_bridge *br); +void br_config_bpdu_generation(struct net_bridge *); +void br_configuration_update(struct net_bridge *); +void br_port_state_selection(struct net_bridge *); +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu); +void br_received_tcn_bpdu(struct net_bridge_port *p); +void br_transmit_config(struct net_bridge_port *p); +void br_transmit_tcn(struct net_bridge *br); +void br_topology_change_detection(struct net_bridge *br); +void __br_set_topology_change(struct net_bridge *br, unsigned char val); + +/* br_stp_bpdu.c */ +void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); +void br_send_tcn_bpdu(struct net_bridge_port *); + +#endif diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h new file mode 100644 index 000000000..f6c65dc08 --- /dev/null +++ b/net/bridge/br_private_tunnel.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Bridge per vlan tunnels + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + */ + +#ifndef _BR_PRIVATE_TUNNEL_H +#define _BR_PRIVATE_TUNNEL_H + +struct vtunnel_info { + u32 tunid; + u16 vid; + u16 flags; +}; + +/* br_netlink_tunnel.c */ +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo); +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, + int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last, + bool *changed); +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg); +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last); +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed); + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +/* br_vlan_tunnel.c */ +int vlan_tunnel_init(struct net_bridge_vlan_group *vg); +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg); +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid); +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id); +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan); +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg); +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan); +#else +static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, + u16 vid) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, + u16 vid, u32 tun_id) +{ + return 0; +} + +static inline void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ +} + +static inline void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ +} + +static inline int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + return 0; +} +#endif + +#endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c new file mode 100644 index 000000000..3e88be7aa --- /dev/null +++ b/net/bridge/br_stp.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Spanning tree protocol; generic parts + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ +#include <linux/kernel.h> +#include <linux/rculist.h> +#include <net/switchdev.h> + +#include "br_private.h" +#include "br_private_stp.h" + +/* since time values in bpdu are in jiffies and then scaled (1/256) + * before sending, make sure that is at least one STP tick. + */ +#define MESSAGE_AGE_INCR ((HZ / 256) + 1) + +static const char *const br_port_state_names[] = { + [BR_STATE_DISABLED] = "disabled", + [BR_STATE_LISTENING] = "listening", + [BR_STATE_LEARNING] = "learning", + [BR_STATE_FORWARDING] = "forwarding", + [BR_STATE_BLOCKING] = "blocking", +}; + +void br_set_state(struct net_bridge_port *p, unsigned int state) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, + .flags = SWITCHDEV_F_DEFER, + .u.stp_state = state, + }; + int err; + + /* Don't change the state of the ports if they are driven by a different + * protocol. + */ + if (p->flags & BR_MRP_AWARE) + return; + + p->state = state; + err = switchdev_port_attr_set(p->dev, &attr); + if (err && err != -EOPNOTSUPP) + br_warn(p->br, "error setting offload STP state on port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + else + br_info(p->br, "port %u(%s) entered %s state\n", + (unsigned int) p->port_no, p->dev->name, + br_port_state_names[p->state]); + + if (p->br->stp_enabled == BR_KERNEL_STP) { + switch (p->state) { + case BR_STATE_BLOCKING: + p->stp_xstats.transition_blk++; + break; + case BR_STATE_FORWARDING: + p->stp_xstats.transition_fwd++; + break; + } + } +} + +/* called under bridge lock */ +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) +{ + struct net_bridge_port *p; + + list_for_each_entry_rcu(p, &br->port_list, list, + lockdep_is_held(&br->lock)) { + if (p->port_no == port_no) + return p; + } + + return NULL; +} + +/* called under bridge lock */ +static int br_should_become_root_port(const struct net_bridge_port *p, + u16 root_port) +{ + struct net_bridge *br; + struct net_bridge_port *rp; + int t; + + br = p->br; + if (p->state == BR_STATE_DISABLED || + br_is_designated_port(p)) + return 0; + + if (memcmp(&br->bridge_id, &p->designated_root, 8) <= 0) + return 0; + + if (!root_port) + return 1; + + rp = br_get_port(br, root_port); + + t = memcmp(&p->designated_root, &rp->designated_root, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->designated_cost + p->path_cost < + rp->designated_cost + rp->path_cost) + return 1; + else if (p->designated_cost + p->path_cost > + rp->designated_cost + rp->path_cost) + return 0; + + t = memcmp(&p->designated_bridge, &rp->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->designated_port < rp->designated_port) + return 1; + else if (p->designated_port > rp->designated_port) + return 0; + + if (p->port_id < rp->port_id) + return 1; + + return 0; +} + +static void br_root_port_block(const struct net_bridge *br, + struct net_bridge_port *p) +{ + + br_notice(br, "port %u(%s) tried to become root port (blocked)", + (unsigned int) p->port_no, p->dev->name); + + br_set_state(p, BR_STATE_LISTENING); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + if (br->forward_delay > 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + +/* called under bridge lock */ +static void br_root_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + u16 root_port = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (!br_should_become_root_port(p, root_port)) + continue; + + if (p->flags & BR_ROOT_BLOCK) + br_root_port_block(br, p); + else + root_port = p->port_no; + } + + br->root_port = root_port; + + if (!root_port) { + br->designated_root = br->bridge_id; + br->root_path_cost = 0; + } else { + p = br_get_port(br, root_port); + br->designated_root = p->designated_root; + br->root_path_cost = p->designated_cost + p->path_cost; + } +} + +/* called under bridge lock */ +void br_become_root_bridge(struct net_bridge *br) +{ + br->max_age = br->bridge_max_age; + br->hello_time = br->bridge_hello_time; + br->forward_delay = br->bridge_forward_delay; + br_topology_change_detection(br); + del_timer(&br->tcn_timer); + + if (br->dev->flags & IFF_UP) { + br_config_bpdu_generation(br); + mod_timer(&br->hello_timer, jiffies + br->hello_time); + } +} + +/* called under bridge lock */ +void br_transmit_config(struct net_bridge_port *p) +{ + struct br_config_bpdu bpdu; + struct net_bridge *br; + + if (timer_pending(&p->hold_timer)) { + p->config_pending = 1; + return; + } + + br = p->br; + + bpdu.topology_change = br->topology_change; + bpdu.topology_change_ack = p->topology_change_ack; + bpdu.root = br->designated_root; + bpdu.root_path_cost = br->root_path_cost; + bpdu.bridge_id = br->bridge_id; + bpdu.port_id = p->port_id; + if (br_is_root_bridge(br)) + bpdu.message_age = 0; + else { + struct net_bridge_port *root + = br_get_port(br, br->root_port); + bpdu.message_age = (jiffies - root->designated_age) + + MESSAGE_AGE_INCR; + } + bpdu.max_age = br->max_age; + bpdu.hello_time = br->hello_time; + bpdu.forward_delay = br->forward_delay; + + if (bpdu.message_age < br->max_age) { + br_send_config_bpdu(p, &bpdu); + p->topology_change_ack = 0; + p->config_pending = 0; + if (p->br->stp_enabled == BR_KERNEL_STP) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); + } +} + +/* called under bridge lock */ +static void br_record_config_information(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) +{ + p->designated_root = bpdu->root; + p->designated_cost = bpdu->root_path_cost; + p->designated_bridge = bpdu->bridge_id; + p->designated_port = bpdu->port_id; + p->designated_age = jiffies - bpdu->message_age; + + mod_timer(&p->message_age_timer, jiffies + + (bpdu->max_age - bpdu->message_age)); +} + +/* called under bridge lock */ +static void br_record_config_timeout_values(struct net_bridge *br, + const struct br_config_bpdu *bpdu) +{ + br->max_age = bpdu->max_age; + br->hello_time = bpdu->hello_time; + br->forward_delay = bpdu->forward_delay; + __br_set_topology_change(br, bpdu->topology_change); +} + +/* called under bridge lock */ +void br_transmit_tcn(struct net_bridge *br) +{ + struct net_bridge_port *p; + + p = br_get_port(br, br->root_port); + if (p) + br_send_tcn_bpdu(p); + else + br_notice(br, "root port %u not found for topology notice\n", + br->root_port); +} + +/* called under bridge lock */ +static int br_should_become_designated_port(const struct net_bridge_port *p) +{ + struct net_bridge *br; + int t; + + br = p->br; + if (br_is_designated_port(p)) + return 1; + + if (memcmp(&p->designated_root, &br->designated_root, 8)) + return 1; + + if (br->root_path_cost < p->designated_cost) + return 1; + else if (br->root_path_cost > p->designated_cost) + return 0; + + t = memcmp(&br->bridge_id, &p->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->port_id < p->designated_port) + return 1; + + return 0; +} + +/* called under bridge lock */ +static void br_designated_port_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_should_become_designated_port(p)) + br_become_designated_port(p); + + } +} + +/* called under bridge lock */ +static int br_supersedes_port_info(const struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) +{ + int t; + + t = memcmp(&bpdu->root, &p->designated_root, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (bpdu->root_path_cost < p->designated_cost) + return 1; + else if (bpdu->root_path_cost > p->designated_cost) + return 0; + + t = memcmp(&bpdu->bridge_id, &p->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8)) + return 1; + + if (bpdu->port_id <= p->designated_port) + return 1; + + return 0; +} + +/* called under bridge lock */ +static void br_topology_change_acknowledged(struct net_bridge *br) +{ + br->topology_change_detected = 0; + del_timer(&br->tcn_timer); +} + +/* called under bridge lock */ +void br_topology_change_detection(struct net_bridge *br) +{ + int isroot = br_is_root_bridge(br); + + if (br->stp_enabled != BR_KERNEL_STP) + return; + + br_info(br, "topology change detected, %s\n", + isroot ? "propagating" : "sending tcn bpdu"); + + if (isroot) { + __br_set_topology_change(br, 1); + mod_timer(&br->topology_change_timer, jiffies + + br->bridge_forward_delay + br->bridge_max_age); + } else if (!br->topology_change_detected) { + br_transmit_tcn(br); + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); + } + + br->topology_change_detected = 1; +} + +/* called under bridge lock */ +void br_config_bpdu_generation(struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_is_designated_port(p)) + br_transmit_config(p); + } +} + +/* called under bridge lock */ +static void br_reply(struct net_bridge_port *p) +{ + br_transmit_config(p); +} + +/* called under bridge lock */ +void br_configuration_update(struct net_bridge *br) +{ + br_root_selection(br); + br_designated_port_selection(br); +} + +/* called under bridge lock */ +void br_become_designated_port(struct net_bridge_port *p) +{ + struct net_bridge *br; + + br = p->br; + p->designated_root = br->designated_root; + p->designated_cost = br->root_path_cost; + p->designated_bridge = br->bridge_id; + p->designated_port = p->port_id; +} + + +/* called under bridge lock */ +static void br_make_blocking(struct net_bridge_port *p) +{ + if (p->state != BR_STATE_DISABLED && + p->state != BR_STATE_BLOCKING) { + if (p->state == BR_STATE_FORWARDING || + p->state == BR_STATE_LEARNING) + br_topology_change_detection(p->br); + + br_set_state(p, BR_STATE_BLOCKING); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + del_timer(&p->forward_delay_timer); + } +} + +/* called under bridge lock */ +static void br_make_forwarding(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + + if (p->state != BR_STATE_BLOCKING) + return; + + if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { + br_set_state(p, BR_STATE_FORWARDING); + br_topology_change_detection(br); + del_timer(&p->forward_delay_timer); + } else if (br->stp_enabled == BR_KERNEL_STP) + br_set_state(p, BR_STATE_LISTENING); + else + br_set_state(p, BR_STATE_LEARNING); + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + if (br->forward_delay != 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + +/* called under bridge lock */ +void br_port_state_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + unsigned int liveports = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state == BR_STATE_DISABLED) + continue; + + /* Don't change port states if userspace is handling STP */ + if (br->stp_enabled != BR_USER_STP) { + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); + } + } + + if (p->state != BR_STATE_BLOCKING) + br_multicast_enable_port(p); + /* Multicast is not disabled for the port when it goes in + * blocking state because the timers will expire and stop by + * themselves without sending more queries. + */ + if (p->state == BR_STATE_FORWARDING) + ++liveports; + } + + if (liveports == 0) + netif_carrier_off(br->dev); + else + netif_carrier_on(br->dev); +} + +/* called under bridge lock */ +static void br_topology_change_acknowledge(struct net_bridge_port *p) +{ + p->topology_change_ack = 1; + br_transmit_config(p); +} + +/* called under bridge lock */ +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) +{ + struct net_bridge *br; + int was_root; + + p->stp_xstats.rx_bpdu++; + + br = p->br; + was_root = br_is_root_bridge(br); + + if (br_supersedes_port_info(p, bpdu)) { + br_record_config_information(p, bpdu); + br_configuration_update(br); + br_port_state_selection(br); + + if (!br_is_root_bridge(br) && was_root) { + del_timer(&br->hello_timer); + if (br->topology_change_detected) { + del_timer(&br->topology_change_timer); + br_transmit_tcn(br); + + mod_timer(&br->tcn_timer, + jiffies + br->bridge_hello_time); + } + } + + if (p->port_no == br->root_port) { + br_record_config_timeout_values(br, bpdu); + br_config_bpdu_generation(br); + if (bpdu->topology_change_ack) + br_topology_change_acknowledged(br); + } + } else if (br_is_designated_port(p)) { + br_reply(p); + } +} + +/* called under bridge lock */ +void br_received_tcn_bpdu(struct net_bridge_port *p) +{ + p->stp_xstats.rx_tcn++; + + if (br_is_designated_port(p)) { + br_info(p->br, "port %u(%s) received tcn bpdu\n", + (unsigned int) p->port_no, p->dev->name); + + br_topology_change_detection(p->br); + br_topology_change_acknowledge(p); + } +} + +/* Change bridge STP parameter */ +int br_set_hello_time(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_hello_time = t; + if (br_is_root_bridge(br)) + br->hello_time = br->bridge_hello_time; + spin_unlock_bh(&br->lock); + return 0; +} + +int br_set_max_age(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_max_age = t; + if (br_is_root_bridge(br)) + br->max_age = br->bridge_max_age; + spin_unlock_bh(&br->lock); + return 0; + +} + +/* called under bridge lock */ +int __set_ageing_time(struct net_device *dev, unsigned long t) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, + .u.ageing_time = jiffies_to_clock_t(t), + }; + int err; + + err = switchdev_port_attr_set(dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +/* Set time interval that dynamic forwarding entries live + * For pure software bridge, allow values outside the 802.1 + * standard specification for special cases: + * 0 - entry never ages (all permanant) + * 1 - entry disappears (no persistance) + * + * Offloaded switch entries maybe more restrictive + */ +int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) +{ + unsigned long t = clock_t_to_jiffies(ageing_time); + int err; + + err = __set_ageing_time(br->dev, t); + if (err) + return err; + + spin_lock_bh(&br->lock); + br->bridge_ageing_time = t; + br->ageing_time = t; + spin_unlock_bh(&br->lock); + + mod_delayed_work(system_long_wq, &br->gc_work, 0); + + return 0; +} + +/* called under bridge lock */ +void __br_set_topology_change(struct net_bridge *br, unsigned char val) +{ + unsigned long t; + int err; + + if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) { + /* On topology change, set the bridge ageing time to twice the + * forward delay. Otherwise, restore its default ageing time. + */ + + if (val) { + t = 2 * br->forward_delay; + br_debug(br, "decreasing ageing time to %lu\n", t); + } else { + t = br->bridge_ageing_time; + br_debug(br, "restoring ageing time to %lu\n", t); + } + + err = __set_ageing_time(br->dev, t); + if (err) + br_warn(br, "error offloading ageing time\n"); + else + br->ageing_time = t; + } + + br->topology_change = val; +} + +void __br_set_forward_delay(struct net_bridge *br, unsigned long t) +{ + br->bridge_forward_delay = t; + if (br_is_root_bridge(br)) + br->forward_delay = br->bridge_forward_delay; +} + +int br_set_forward_delay(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + int err = -ERANGE; + + spin_lock_bh(&br->lock); + if (br->stp_enabled != BR_NO_STP && + (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) + goto unlock; + + __br_set_forward_delay(br, t); + err = 0; + +unlock: + spin_unlock_bh(&br->lock); + return err; +} diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c new file mode 100644 index 000000000..0e4572f31 --- /dev/null +++ b/net/bridge/br_stp_bpdu.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Spanning tree protocol; BPDU handling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/netfilter_bridge.h> +#include <linux/etherdevice.h> +#include <linux/llc.h> +#include <linux/slab.h> +#include <linux/pkt_sched.h> +#include <net/net_namespace.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/stp.h> +#include <asm/unaligned.h> + +#include "br_private.h" +#include "br_private_stp.h" + +#define STP_HZ 256 + +#define LLC_RESERVE sizeof(struct llc_pdu_un) + +static int br_send_bpdu_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} + +static void br_send_bpdu(struct net_bridge_port *p, + const unsigned char *data, int length) +{ + struct sk_buff *skb; + + skb = dev_alloc_skb(length+LLC_RESERVE); + if (!skb) + return; + + skb->dev = p->dev; + skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; + + skb_reserve(skb, LLC_RESERVE); + __skb_put_data(skb, data, length); + + llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, + LLC_SAP_BSPAN, LLC_PDU_CMD); + llc_pdu_init_as_ui_cmd(skb); + + llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); + + skb_reset_mac_header(skb); + + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, + dev_net(p->dev), NULL, skb, NULL, skb->dev, + br_send_bpdu_finish); +} + +static inline void br_set_ticks(unsigned char *dest, int j) +{ + unsigned long ticks = (STP_HZ * j)/ HZ; + + put_unaligned_be16(ticks, dest); +} + +static inline int br_get_ticks(const unsigned char *src) +{ + unsigned long ticks = get_unaligned_be16(src); + + return DIV_ROUND_UP(ticks * HZ, STP_HZ); +} + +/* called under bridge lock */ +void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +{ + unsigned char buf[35]; + + if (p->br->stp_enabled != BR_KERNEL_STP) + return; + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = BPDU_TYPE_CONFIG; + buf[4] = (bpdu->topology_change ? 0x01 : 0) | + (bpdu->topology_change_ack ? 0x80 : 0); + buf[5] = bpdu->root.prio[0]; + buf[6] = bpdu->root.prio[1]; + buf[7] = bpdu->root.addr[0]; + buf[8] = bpdu->root.addr[1]; + buf[9] = bpdu->root.addr[2]; + buf[10] = bpdu->root.addr[3]; + buf[11] = bpdu->root.addr[4]; + buf[12] = bpdu->root.addr[5]; + buf[13] = (bpdu->root_path_cost >> 24) & 0xFF; + buf[14] = (bpdu->root_path_cost >> 16) & 0xFF; + buf[15] = (bpdu->root_path_cost >> 8) & 0xFF; + buf[16] = bpdu->root_path_cost & 0xFF; + buf[17] = bpdu->bridge_id.prio[0]; + buf[18] = bpdu->bridge_id.prio[1]; + buf[19] = bpdu->bridge_id.addr[0]; + buf[20] = bpdu->bridge_id.addr[1]; + buf[21] = bpdu->bridge_id.addr[2]; + buf[22] = bpdu->bridge_id.addr[3]; + buf[23] = bpdu->bridge_id.addr[4]; + buf[24] = bpdu->bridge_id.addr[5]; + buf[25] = (bpdu->port_id >> 8) & 0xFF; + buf[26] = bpdu->port_id & 0xFF; + + br_set_ticks(buf+27, bpdu->message_age); + br_set_ticks(buf+29, bpdu->max_age); + br_set_ticks(buf+31, bpdu->hello_time); + br_set_ticks(buf+33, bpdu->forward_delay); + + br_send_bpdu(p, buf, 35); + + p->stp_xstats.tx_bpdu++; +} + +/* called under bridge lock */ +void br_send_tcn_bpdu(struct net_bridge_port *p) +{ + unsigned char buf[4]; + + if (p->br->stp_enabled != BR_KERNEL_STP) + return; + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = BPDU_TYPE_TCN; + br_send_bpdu(p, buf, 4); + + p->stp_xstats.tx_tcn++; +} + +/* + * Called from llc. + * + * NO locks, but rcu_read_lock + */ +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) +{ + struct net_bridge_port *p; + struct net_bridge *br; + const unsigned char *buf; + + if (!pskb_may_pull(skb, 4)) + goto err; + + /* compare of protocol id and version */ + buf = skb->data; + if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) + goto err; + + p = br_port_get_check_rcu(dev); + if (!p) + goto err; + + br = p->br; + spin_lock(&br->lock); + + if (br->stp_enabled != BR_KERNEL_STP) + goto out; + + if (!(br->dev->flags & IFF_UP)) + goto out; + + if (p->state == BR_STATE_DISABLED) + goto out; + + if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr)) + goto out; + + if (p->flags & BR_BPDU_GUARD) { + br_notice(br, "BPDU received on blocked port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + br_stp_disable_port(p); + goto out; + } + + buf = skb_pull(skb, 3); + + if (buf[0] == BPDU_TYPE_CONFIG) { + struct br_config_bpdu bpdu; + + if (!pskb_may_pull(skb, 32)) + goto out; + + buf = skb->data; + bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0; + bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0; + + bpdu.root.prio[0] = buf[2]; + bpdu.root.prio[1] = buf[3]; + bpdu.root.addr[0] = buf[4]; + bpdu.root.addr[1] = buf[5]; + bpdu.root.addr[2] = buf[6]; + bpdu.root.addr[3] = buf[7]; + bpdu.root.addr[4] = buf[8]; + bpdu.root.addr[5] = buf[9]; + bpdu.root_path_cost = + (buf[10] << 24) | + (buf[11] << 16) | + (buf[12] << 8) | + buf[13]; + bpdu.bridge_id.prio[0] = buf[14]; + bpdu.bridge_id.prio[1] = buf[15]; + bpdu.bridge_id.addr[0] = buf[16]; + bpdu.bridge_id.addr[1] = buf[17]; + bpdu.bridge_id.addr[2] = buf[18]; + bpdu.bridge_id.addr[3] = buf[19]; + bpdu.bridge_id.addr[4] = buf[20]; + bpdu.bridge_id.addr[5] = buf[21]; + bpdu.port_id = (buf[22] << 8) | buf[23]; + + bpdu.message_age = br_get_ticks(buf+24); + bpdu.max_age = br_get_ticks(buf+26); + bpdu.hello_time = br_get_ticks(buf+28); + bpdu.forward_delay = br_get_ticks(buf+30); + + if (bpdu.message_age > bpdu.max_age) { + if (net_ratelimit()) + br_notice(p->br, + "port %u config from %pM" + " (message_age %ul > max_age %ul)\n", + p->port_no, + eth_hdr(skb)->h_source, + bpdu.message_age, bpdu.max_age); + goto out; + } + + br_received_config_bpdu(p, &bpdu); + } else if (buf[0] == BPDU_TYPE_TCN) { + br_received_tcn_bpdu(p); + } + out: + spin_unlock(&br->lock); + err: + kfree_skb(skb); +} diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c new file mode 100644 index 000000000..3326dfced --- /dev/null +++ b/net/bridge/br_stp_if.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Spanning tree protocol; interface code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <net/switchdev.h> + +#include "br_private.h" +#include "br_private_stp.h" + + +/* Port id is composed of priority and port number. + * NB: some bits of priority are dropped to + * make room for more ports. + */ +static inline port_id br_make_port_id(__u8 priority, __u16 port_no) +{ + return ((u16)priority << BR_PORT_BITS) + | (port_no & ((1<<BR_PORT_BITS)-1)); +} + +#define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS) + +/* called under bridge lock */ +void br_init_port(struct net_bridge_port *p) +{ + int err; + + p->port_id = br_make_port_id(p->priority, p->port_no); + br_become_designated_port(p); + br_set_state(p, BR_STATE_BLOCKING); + p->topology_change_ack = 0; + p->config_pending = 0; + + err = __set_ageing_time(p->dev, p->br->ageing_time); + if (err) + netdev_err(p->dev, "failed to offload ageing time\n"); +} + +/* NO locks held */ +void br_stp_enable_bridge(struct net_bridge *br) +{ + struct net_bridge_port *p; + + spin_lock_bh(&br->lock); + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); + mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); + + br_config_bpdu_generation(br); + + list_for_each_entry(p, &br->port_list, list) { + if (netif_running(p->dev) && netif_oper_up(p->dev)) + br_stp_enable_port(p); + + } + spin_unlock_bh(&br->lock); +} + +/* NO locks held */ +void br_stp_disable_bridge(struct net_bridge *br) +{ + struct net_bridge_port *p; + + spin_lock_bh(&br->lock); + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED) + br_stp_disable_port(p); + + } + + __br_set_topology_change(br, 0); + br->topology_change_detected = 0; + spin_unlock_bh(&br->lock); + + del_timer_sync(&br->hello_timer); + del_timer_sync(&br->topology_change_timer); + del_timer_sync(&br->tcn_timer); + cancel_delayed_work_sync(&br->gc_work); +} + +/* called under bridge lock */ +void br_stp_enable_port(struct net_bridge_port *p) +{ + br_init_port(p); + br_port_state_selection(p->br); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); +} + +/* called under bridge lock */ +void br_stp_disable_port(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + int wasroot; + + wasroot = br_is_root_bridge(br); + br_become_designated_port(p); + br_set_state(p, BR_STATE_DISABLED); + p->topology_change_ack = 0; + p->config_pending = 0; + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + + del_timer(&p->message_age_timer); + del_timer(&p->forward_delay_timer); + del_timer(&p->hold_timer); + + if (!rcu_access_pointer(p->backup_port)) + br_fdb_delete_by_port(br, p, 0, 0); + br_multicast_disable_port(p); + + br_configuration_update(br); + + br_port_state_selection(br); + + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); +} + +static int br_stp_call_user(struct net_bridge *br, char *arg) +{ + char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL }; + char *envp[] = { NULL }; + int rc; + + /* call userspace STP and report program errors */ + rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (rc > 0) { + if (rc & 0xff) + br_debug(br, BR_STP_PROG " received signal %d\n", + rc & 0x7f); + else + br_debug(br, BR_STP_PROG " exited with code %d\n", + (rc >> 8) & 0xff); + } + + return rc; +} + +static void br_stp_start(struct net_bridge *br) +{ + int err = -ENOENT; + + if (net_eq(dev_net(br->dev), &init_net)) + err = br_stp_call_user(br, "start"); + + if (err && err != -ENOENT) + br_err(br, "failed to start userspace STP (%d)\n", err); + + spin_lock_bh(&br->lock); + + if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); + + if (!err) { + br->stp_enabled = BR_USER_STP; + br_debug(br, "userspace STP started\n"); + } else { + br->stp_enabled = BR_KERNEL_STP; + br_debug(br, "using kernel STP\n"); + + /* To start timers on any ports left in blocking */ + if (br->dev->flags & IFF_UP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); + br_port_state_selection(br); + } + + spin_unlock_bh(&br->lock); +} + +static void br_stp_stop(struct net_bridge *br) +{ + int err; + + if (br->stp_enabled == BR_USER_STP) { + err = br_stp_call_user(br, "stop"); + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); + + /* To start timers on any ports left in blocking */ + spin_lock_bh(&br->lock); + br_port_state_selection(br); + spin_unlock_bh(&br->lock); + } + + br->stp_enabled = BR_NO_STP; +} + +int br_stp_set_enabled(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack) +{ + ASSERT_RTNL(); + + if (!net_eq(dev_net(br->dev), &init_net)) + NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns"); + + if (br_mrp_enabled(br)) { + NL_SET_ERR_MSG_MOD(extack, + "STP can't be enabled if MRP is already enabled"); + return -EINVAL; + } + + if (val) { + if (br->stp_enabled == BR_NO_STP) + br_stp_start(br); + } else { + if (br->stp_enabled != BR_NO_STP) + br_stp_stop(br); + } + + return 0; +} + +/* called under bridge lock */ +void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) +{ + /* should be aligned on 2 bytes for ether_addr_equal() */ + unsigned short oldaddr_aligned[ETH_ALEN >> 1]; + unsigned char *oldaddr = (unsigned char *)oldaddr_aligned; + struct net_bridge_port *p; + int wasroot; + + wasroot = br_is_root_bridge(br); + + br_fdb_change_mac_address(br, addr); + + memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); + memcpy(br->bridge_id.addr, addr, ETH_ALEN); + memcpy(br->dev->dev_addr, addr, ETH_ALEN); + + list_for_each_entry(p, &br->port_list, list) { + if (ether_addr_equal(p->designated_bridge.addr, oldaddr)) + memcpy(p->designated_bridge.addr, addr, ETH_ALEN); + + if (ether_addr_equal(p->designated_root.addr, oldaddr)) + memcpy(p->designated_root.addr, addr, ETH_ALEN); + } + + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); +} + +/* should be aligned on 2 bytes for ether_addr_equal() */ +static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; + +/* called under bridge lock */ +bool br_stp_recalculate_bridge_id(struct net_bridge *br) +{ + const unsigned char *br_mac_zero = + (const unsigned char *)br_mac_zero_aligned; + const unsigned char *addr = br_mac_zero; + struct net_bridge_port *p; + + /* user has chosen a value so keep it */ + if (br->dev->addr_assign_type == NET_ADDR_SET) + return false; + + list_for_each_entry(p, &br->port_list, list) { + if (addr == br_mac_zero || + memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) + addr = p->dev->dev_addr; + + } + + if (ether_addr_equal(br->bridge_id.addr, addr)) + return false; /* no change */ + + br_stp_change_bridge_id(br, addr); + return true; +} + +/* Acquires and releases bridge lock */ +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) +{ + struct net_bridge_port *p; + int wasroot; + + spin_lock_bh(&br->lock); + wasroot = br_is_root_bridge(br); + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_is_designated_port(p)) { + p->designated_bridge.prio[0] = (newprio >> 8) & 0xFF; + p->designated_bridge.prio[1] = newprio & 0xFF; + } + + } + + br->bridge_id.prio[0] = (newprio >> 8) & 0xFF; + br->bridge_id.prio[1] = newprio & 0xFF; + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); + spin_unlock_bh(&br->lock); +} + +/* called under bridge lock */ +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio) +{ + port_id new_port_id; + + if (newprio > BR_MAX_PORT_PRIORITY) + return -ERANGE; + + new_port_id = br_make_port_id(newprio, p->port_no); + if (br_is_designated_port(p)) + p->designated_port = new_port_id; + + p->port_id = new_port_id; + p->priority = newprio; + if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) && + p->port_id < p->designated_port) { + br_become_designated_port(p); + br_port_state_selection(p->br); + } + + return 0; +} + +/* called under bridge lock */ +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) +{ + if (path_cost < BR_MIN_PATH_COST || + path_cost > BR_MAX_PATH_COST) + return -ERANGE; + + p->flags |= BR_ADMIN_COST; + p->path_cost = path_cost; + br_configuration_update(p->br); + br_port_state_selection(p->br); + return 0; +} + +ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) +{ + return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", + id->prio[0], id->prio[1], + id->addr[0], id->addr[1], id->addr[2], + id->addr[3], id->addr[4], id->addr[5]); +} diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c new file mode 100644 index 000000000..27bf1979b --- /dev/null +++ b/net/bridge/br_stp_timer.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Spanning tree protocol; timer-related code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + */ + +#include <linux/kernel.h> +#include <linux/times.h> + +#include "br_private.h" +#include "br_private_stp.h" + +/* called under bridge lock */ +static int br_is_designated_for_some_port(const struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + !memcmp(&p->designated_bridge, &br->bridge_id, 8)) + return 1; + } + + return 0; +} + +static void br_hello_timer_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, hello_timer); + + br_debug(br, "hello timer expired\n"); + spin_lock(&br->lock); + if (br->dev->flags & IFF_UP) { + br_config_bpdu_generation(br); + + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); + } + spin_unlock(&br->lock); +} + +static void br_message_age_timer_expired(struct timer_list *t) +{ + struct net_bridge_port *p = from_timer(p, t, message_age_timer); + struct net_bridge *br = p->br; + const bridge_id *id = &p->designated_bridge; + int was_root; + + if (p->state == BR_STATE_DISABLED) + return; + + br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n", + (unsigned int) p->port_no, p->dev->name, + id->prio[0], id->prio[1], &id->addr); + + /* + * According to the spec, the message age timer cannot be + * running when we are the root bridge. So.. this was_root + * check is redundant. I'm leaving it in for now, though. + */ + spin_lock(&br->lock); + if (p->state == BR_STATE_DISABLED) + goto unlock; + was_root = br_is_root_bridge(br); + + br_become_designated_port(p); + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !was_root) + br_become_root_bridge(br); + unlock: + spin_unlock(&br->lock); +} + +static void br_forward_delay_timer_expired(struct timer_list *t) +{ + struct net_bridge_port *p = from_timer(p, t, forward_delay_timer); + struct net_bridge *br = p->br; + + br_debug(br, "port %u(%s) forward delay timer\n", + (unsigned int) p->port_no, p->dev->name); + spin_lock(&br->lock); + if (p->state == BR_STATE_LISTENING) { + br_set_state(p, BR_STATE_LEARNING); + mod_timer(&p->forward_delay_timer, + jiffies + br->forward_delay); + } else if (p->state == BR_STATE_LEARNING) { + br_set_state(p, BR_STATE_FORWARDING); + if (br_is_designated_for_some_port(br)) + br_topology_change_detection(br); + netif_carrier_on(br->dev); + } + rcu_read_lock(); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + rcu_read_unlock(); + spin_unlock(&br->lock); +} + +static void br_tcn_timer_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, tcn_timer); + + br_debug(br, "tcn timer expired\n"); + spin_lock(&br->lock); + if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { + br_transmit_tcn(br); + + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); + } + spin_unlock(&br->lock); +} + +static void br_topology_change_timer_expired(struct timer_list *t) +{ + struct net_bridge *br = from_timer(br, t, topology_change_timer); + + br_debug(br, "topo change timer expired\n"); + spin_lock(&br->lock); + br->topology_change_detected = 0; + __br_set_topology_change(br, 0); + spin_unlock(&br->lock); +} + +static void br_hold_timer_expired(struct timer_list *t) +{ + struct net_bridge_port *p = from_timer(p, t, hold_timer); + + br_debug(p->br, "port %u(%s) hold timer expired\n", + (unsigned int) p->port_no, p->dev->name); + + spin_lock(&p->br->lock); + if (p->config_pending) + br_transmit_config(p); + spin_unlock(&p->br->lock); +} + +void br_stp_timer_init(struct net_bridge *br) +{ + timer_setup(&br->hello_timer, br_hello_timer_expired, 0); + timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0); + timer_setup(&br->topology_change_timer, + br_topology_change_timer_expired, 0); +} + +void br_stp_port_timer_init(struct net_bridge_port *p) +{ + timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0); + timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0); + timer_setup(&p->hold_timer, br_hold_timer_expired, 0); +} + +/* Report ticks left (in USER_HZ) used for API */ +unsigned long br_timer_value(const struct timer_list *timer) +{ + return timer_pending(timer) + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; +} diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000..3c42095fa --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <net/switchdev.h> + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (netdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct netdev_phys_item_id ppid = { }; + int err; + + ASSERT_RTNL(); + + err = dev_get_port_parent_id(p->dev, &ppid, true); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} + +/* Flags that can be offloaded to hardware */ +#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ + BR_MCAST_FLOOD | BR_BCAST_FLOOD) + +int br_switchdev_set_port_flag(struct net_bridge_port *p, + unsigned long flags, + unsigned long mask) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, + .u.brport_flags = mask, + }; + struct switchdev_notifier_port_attr_info info = { + .attr = &attr, + }; + int err; + + if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD) + return 0; + + /* We run from atomic context here */ + err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, + &info.info, NULL); + err = notifier_to_errno(err); + if (err == -EOPNOTSUPP) + return 0; + + if (err) { + br_warn(p->br, "bridge flag offload is not supported %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return -EOPNOTSUPP; + } + + attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS; + attr.flags = SWITCHDEV_F_DEFER; + attr.u.brport_flags = flags; + + err = switchdev_port_attr_set(p->dev, &attr); + if (err) { + br_warn(p->br, "error setting offload flag on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + return err; + } + + return 0; +} + +static void +br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, + u16 vid, struct net_device *dev, + bool added_by_user, bool offloaded) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + info.addr = mac; + info.vid = vid; + info.added_by_user = added_by_user; + info.offloaded = offloaded; + notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; + call_switchdev_notifiers(notifier_type, dev, &info.info, NULL); +} + +void +br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +{ + if (!fdb->dst) + return; + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return; + + switch (type) { + case RTM_DELNEIGH: + br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, + fdb->key.vlan_id, + fdb->dst->dev, + test_bit(BR_FDB_ADDED_BY_USER, + &fdb->flags), + test_bit(BR_FDB_OFFLOADED, + &fdb->flags)); + break; + case RTM_NEWNEIGH: + br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, + fdb->key.vlan_id, + fdb->dst->dev, + test_bit(BR_FDB_ADDED_BY_USER, + &fdb->flags), + test_bit(BR_FDB_OFFLOADED, + &fdb->flags)); + break; + } +} + +int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, + struct netlink_ext_ack *extack) +{ + struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = flags, + .vid_begin = vid, + .vid_end = vid, + }; + + return switchdev_port_obj_add(dev, &v.obj, extack); +} + +int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) +{ + struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid_begin = vid, + .vid_end = vid, + }; + + return switchdev_port_obj_del(dev, &v.obj); +} diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c new file mode 100644 index 000000000..7db06e3f6 --- /dev/null +++ b/net/bridge/br_sysfs_br.c @@ -0,0 +1,997 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Sysfs attributes of bridge + * Linux ethernet bridge + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> +#include <linux/times.h> +#include <linux/sched/signal.h> + +#include "br_private.h" + +#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) + +/* + * Common code for storing bridge parameters. + */ +static ssize_t store_bridge_parm(struct device *d, + const char *buf, size_t len, + int (*set)(struct net_bridge *, unsigned long)) +{ + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + int err; + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = (*set)(br, val); + if (!err) + netdev_state_change(br->dev); + rtnl_unlock(); + + return err ? err : len; +} + + +static ssize_t forward_delay_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); +} + +static ssize_t forward_delay_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_set_forward_delay); +} +static DEVICE_ATTR_RW(forward_delay); + +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(d)->hello_time)); +} + +static ssize_t hello_time_store(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, br_set_hello_time); +} +static DEVICE_ATTR_RW(hello_time); + +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(d)->max_age)); +} + +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_set_max_age); +} +static DEVICE_ATTR_RW(max_age); + +static ssize_t ageing_time_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); +} + +static int set_ageing_time(struct net_bridge *br, unsigned long val) +{ + return br_set_ageing_time(br, val); +} + +static ssize_t ageing_time_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_ageing_time); +} +static DEVICE_ATTR_RW(ageing_time); + +static ssize_t stp_state_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->stp_enabled); +} + + +static int set_stp_state(struct net_bridge *br, unsigned long val) +{ + return br_stp_set_enabled(br, val, NULL); +} + +static ssize_t stp_state_store(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_stp_state); +} +static DEVICE_ATTR_RW(stp_state); + +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#x\n", br->group_fwd_mask); +} + +static int set_group_fwd_mask(struct net_bridge *br, unsigned long val) +{ + if (val & BR_GROUPFWD_RESTRICTED) + return -EINVAL; + + br->group_fwd_mask = val; + + return 0; +} + +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_group_fwd_mask); +} +static DEVICE_ATTR_RW(group_fwd_mask); + +static ssize_t priority_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", + (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); +} + +static int set_priority(struct net_bridge *br, unsigned long val) +{ + br_stp_set_bridge_priority(br, (u16) val); + return 0; +} + +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_priority); +} +static DEVICE_ATTR_RW(priority); + +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + return br_show_bridge_id(buf, &to_bridge(d)->designated_root); +} +static DEVICE_ATTR_RO(root_id); + +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); +} +static DEVICE_ATTR_RO(bridge_id); + +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(d)->root_port); +} +static DEVICE_ATTR_RO(root_port); + +static ssize_t root_path_cost_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); +} +static DEVICE_ATTR_RO(root_path_cost); + +static ssize_t topology_change_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(d)->topology_change); +} +static DEVICE_ATTR_RO(topology_change); + +static ssize_t topology_change_detected_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->topology_change_detected); +} +static DEVICE_ATTR_RO(topology_change_detected); + +static ssize_t hello_timer_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); +} +static DEVICE_ATTR_RO(hello_timer); + +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); +} +static DEVICE_ATTR_RO(tcn_timer); + +static ssize_t topology_change_timer_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); +} +static DEVICE_ATTR_RO(topology_change_timer); + +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); +} +static DEVICE_ATTR_RO(gc_timer); + +static ssize_t group_addr_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%pM\n", br->group_addr); +} + +static ssize_t group_addr_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_bridge *br = to_bridge(d); + u8 new_addr[6]; + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!mac_pton(buf, new_addr)) + return -EINVAL; + + if (!is_link_local_ether_addr(new_addr)) + return -EINVAL; + + if (new_addr[5] == 1 || /* 802.3x Pause address */ + new_addr[5] == 2 || /* 802.3ad Slow protocols */ + new_addr[5] == 3) /* 802.1X PAE address */ + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + spin_lock_bh(&br->lock); + ether_addr_copy(br->group_addr, new_addr); + spin_unlock_bh(&br->lock); + + br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true); + br_recalculate_fwd_mask(br); + netdev_state_change(br->dev); + + rtnl_unlock(); + + return len; +} + +static DEVICE_ATTR_RW(group_addr); + +static int set_flush(struct net_bridge *br, unsigned long val) +{ + br_fdb_flush(br); + return 0; +} + +static ssize_t flush_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_flush); +} +static DEVICE_ATTR_WO(flush); + +static ssize_t no_linklocal_learn_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN)); +} + +static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val) +{ + return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, NULL); +} + +static ssize_t no_linklocal_learn_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_no_linklocal_learn); +} +static DEVICE_ATTR_RW(no_linklocal_learn); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t multicast_router_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_router); +} + +static ssize_t multicast_router_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_router); +} +static DEVICE_ATTR_RW(multicast_router); + +static ssize_t multicast_snooping_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED)); +} + +static ssize_t multicast_snooping_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_toggle); +} +static DEVICE_ATTR_RW(multicast_snooping); + +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", + br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val); + return 0; +} + +static ssize_t +multicast_query_use_ifaddr_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); + +static ssize_t multicast_querier_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER)); +} + +static ssize_t multicast_querier_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_querier); +} +static DEVICE_ATTR_RW(multicast_querier); + +static ssize_t hash_elasticity_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", RHT_ELASTICITY); +} + +static int set_elasticity(struct net_bridge *br, unsigned long val) +{ + br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n", + RHT_ELASTICITY); + return 0; +} + +static ssize_t hash_elasticity_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_elasticity); +} +static DEVICE_ATTR_RW(hash_elasticity); + +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->hash_max); +} + +static int set_hash_max(struct net_bridge *br, unsigned long val) +{ + br->hash_max = val; + return 0; +} + +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_hash_max); +} +static DEVICE_ATTR_RW(hash_max); + +static ssize_t multicast_igmp_version_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_igmp_version); +} + +static ssize_t multicast_igmp_version_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version); +} +static DEVICE_ATTR_RW(multicast_igmp_version); + +static ssize_t multicast_last_member_count_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_last_member_count); +} + +static int set_last_member_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_count = val; + return 0; +} + +static ssize_t multicast_last_member_count_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_count); +} +static DEVICE_ATTR_RW(multicast_last_member_count); + +static ssize_t multicast_startup_query_count_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_startup_query_count); +} + +static int set_startup_query_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_count = val; + return 0; +} + +static ssize_t multicast_startup_query_count_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_count); +} +static DEVICE_ATTR_RW(multicast_startup_query_count); + +static ssize_t multicast_last_member_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_last_member_interval)); +} + +static int set_last_member_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_last_member_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_interval); +} +static DEVICE_ATTR_RW(multicast_last_member_interval); + +static ssize_t multicast_membership_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_membership_interval)); +} + +static int set_membership_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_membership_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_membership_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_membership_interval); +} +static DEVICE_ATTR_RW(multicast_membership_interval); + +static ssize_t multicast_querier_interval_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_querier_interval)); +} + +static int set_querier_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_querier_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_querier_interval_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_querier_interval); +} +static DEVICE_ATTR_RW(multicast_querier_interval); + +static ssize_t multicast_query_interval_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_interval)); +} + +static int set_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_query_interval_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_interval); +} +static DEVICE_ATTR_RW(multicast_query_interval); + +static ssize_t multicast_query_response_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_response_interval)); +} + +static int set_query_response_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_response_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_query_response_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_response_interval); +} +static DEVICE_ATTR_RW(multicast_query_response_interval); + +static ssize_t multicast_startup_query_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_startup_query_interval)); +} + +static int set_startup_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_startup_query_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_interval); +} +static DEVICE_ATTR_RW(multicast_startup_query_interval); + +static ssize_t multicast_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%d\n", + br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)); +} + +static int set_stats_enabled(struct net_bridge *br, unsigned long val) +{ + br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val); + return 0; +} + +static ssize_t multicast_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_stats_enabled); +} +static DEVICE_ATTR_RW(multicast_stats_enabled); + +#if IS_ENABLED(CONFIG_IPV6) +static ssize_t multicast_mld_version_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_mld_version); +} + +static ssize_t multicast_mld_version_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_mld_version); +} +static DEVICE_ATTR_RW(multicast_mld_version); +#endif +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static ssize_t nf_call_iptables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES)); +} + +static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) +{ + br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val); + return 0; +} + +static ssize_t nf_call_iptables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_iptables); +} +static DEVICE_ATTR_RW(nf_call_iptables); + +static ssize_t nf_call_ip6tables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES)); +} + +static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) +{ + br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val); + return 0; +} + +static ssize_t nf_call_ip6tables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); +} +static DEVICE_ATTR_RW(nf_call_ip6tables); + +static ssize_t nf_call_arptables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES)); +} + +static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) +{ + br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val); + return 0; +} + +static ssize_t nf_call_arptables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_arptables); +} +static DEVICE_ATTR_RW(nf_call_arptables); +#endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +static ssize_t vlan_filtering_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_ENABLED)); +} + +static ssize_t vlan_filtering_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); +} +static DEVICE_ATTR_RW(vlan_filtering); + +static ssize_t vlan_protocol_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); +} + +static ssize_t vlan_protocol_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_proto); +} +static DEVICE_ATTR_RW(vlan_protocol); + +static ssize_t default_pvid_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->default_pvid); +} + +static ssize_t default_pvid_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); +} +static DEVICE_ATTR_RW(default_pvid); + +static ssize_t vlan_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED)); +} + +static ssize_t vlan_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_stats); +} +static DEVICE_ATTR_RW(vlan_stats_enabled); + +static ssize_t vlan_stats_per_port_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)); +} + +static ssize_t vlan_stats_per_port_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_stats_per_port); +} +static DEVICE_ATTR_RW(vlan_stats_per_port); +#endif + +static struct attribute *bridge_attrs[] = { + &dev_attr_forward_delay.attr, + &dev_attr_hello_time.attr, + &dev_attr_max_age.attr, + &dev_attr_ageing_time.attr, + &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, + &dev_attr_priority.attr, + &dev_attr_bridge_id.attr, + &dev_attr_root_id.attr, + &dev_attr_root_path_cost.attr, + &dev_attr_root_port.attr, + &dev_attr_topology_change.attr, + &dev_attr_topology_change_detected.attr, + &dev_attr_hello_timer.attr, + &dev_attr_tcn_timer.attr, + &dev_attr_topology_change_timer.attr, + &dev_attr_gc_timer.attr, + &dev_attr_group_addr.attr, + &dev_attr_flush.attr, + &dev_attr_no_linklocal_learn.attr, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &dev_attr_multicast_router.attr, + &dev_attr_multicast_snooping.attr, + &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, + &dev_attr_hash_elasticity.attr, + &dev_attr_hash_max.attr, + &dev_attr_multicast_last_member_count.attr, + &dev_attr_multicast_startup_query_count.attr, + &dev_attr_multicast_last_member_interval.attr, + &dev_attr_multicast_membership_interval.attr, + &dev_attr_multicast_querier_interval.attr, + &dev_attr_multicast_query_interval.attr, + &dev_attr_multicast_query_response_interval.attr, + &dev_attr_multicast_startup_query_interval.attr, + &dev_attr_multicast_stats_enabled.attr, + &dev_attr_multicast_igmp_version.attr, +#if IS_ENABLED(CONFIG_IPV6) + &dev_attr_multicast_mld_version.attr, +#endif +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + &dev_attr_nf_call_iptables.attr, + &dev_attr_nf_call_ip6tables.attr, + &dev_attr_nf_call_arptables.attr, +#endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + &dev_attr_vlan_filtering.attr, + &dev_attr_vlan_protocol.attr, + &dev_attr_default_pvid.attr, + &dev_attr_vlan_stats_enabled.attr, + &dev_attr_vlan_stats_per_port.attr, +#endif + NULL +}; + +static const struct attribute_group bridge_group = { + .name = SYSFS_BRIDGE_ATTR, + .attrs = bridge_attrs, +}; + +/* + * Export the forwarding information table as a binary file + * The records are struct __fdb_entry. + * + * Returns the number of bytes read. + */ +static ssize_t brforward_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct net_bridge *br = to_bridge(dev); + int n; + + /* must read whole records */ + if (off % sizeof(struct __fdb_entry) != 0) + return -EINVAL; + + n = br_fdb_fillbuf(br, buf, + count / sizeof(struct __fdb_entry), + off / sizeof(struct __fdb_entry)); + + if (n > 0) + n *= sizeof(struct __fdb_entry); + + return n; +} + +static struct bin_attribute bridge_forward = { + .attr = { .name = SYSFS_BRIDGE_FDB, + .mode = 0444, }, + .read = brforward_read, +}; + +/* + * Add entries in sysfs onto the existing network class device + * for the bridge. + * Adds a attribute group "bridge" containing tuning parameters. + * Binary attribute containing the forward table + * Sub directory to hold links to interfaces. + * + * Note: the ifobj exists only to be a subdirectory + * to hold links. The ifobj exists in same data structure + * as it's parent the bridge so reference counting works. + */ +int br_sysfs_addbr(struct net_device *dev) +{ + struct kobject *brobj = &dev->dev.kobj; + struct net_bridge *br = netdev_priv(dev); + int err; + + err = sysfs_create_group(brobj, &bridge_group); + if (err) { + pr_info("%s: can't create group %s/%s\n", + __func__, dev->name, bridge_group.name); + goto out1; + } + + err = sysfs_create_bin_file(brobj, &bridge_forward); + if (err) { + pr_info("%s: can't create attribute file %s/%s\n", + __func__, dev->name, bridge_forward.attr.name); + goto out2; + } + + br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); + if (!br->ifobj) { + pr_info("%s: can't add kobject (directory) %s/%s\n", + __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); + err = -ENOMEM; + goto out3; + } + return 0; + out3: + sysfs_remove_bin_file(&dev->dev.kobj, &bridge_forward); + out2: + sysfs_remove_group(&dev->dev.kobj, &bridge_group); + out1: + return err; + +} + +void br_sysfs_delbr(struct net_device *dev) +{ + struct kobject *kobj = &dev->dev.kobj; + struct net_bridge *br = netdev_priv(dev); + + kobject_put(br->ifobj); + sysfs_remove_bin_file(kobj, &bridge_forward); + sysfs_remove_group(kobj, &bridge_group); +} diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c new file mode 100644 index 000000000..5047e9c23 --- /dev/null +++ b/net/bridge/br_sysfs_if.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Sysfs attributes of bridge ports + * Linux ethernet bridge + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> +#include <linux/sched/signal.h> + +#include "br_private.h" + +struct brport_attribute { + struct attribute attr; + ssize_t (*show)(struct net_bridge_port *, char *); + int (*store)(struct net_bridge_port *, unsigned long); + int (*store_raw)(struct net_bridge_port *, char *); +}; + +#define BRPORT_ATTR_RAW(_name, _mode, _show, _store) \ +const struct brport_attribute brport_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode }, \ + .show = _show, \ + .store_raw = _store, \ +}; + +#define BRPORT_ATTR(_name, _mode, _show, _store) \ +const struct brport_attribute brport_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +#define BRPORT_ATTR_FLAG(_name, _mask) \ +static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \ +{ \ + return sprintf(buf, "%d\n", !!(p->flags & _mask)); \ +} \ +static int store_##_name(struct net_bridge_port *p, unsigned long v) \ +{ \ + return store_flag(p, v, _mask); \ +} \ +static BRPORT_ATTR(_name, 0644, \ + show_##_name, store_##_name) + +static int store_flag(struct net_bridge_port *p, unsigned long v, + unsigned long mask) +{ + unsigned long flags = p->flags; + int err; + + if (v) + flags |= mask; + else + flags &= ~mask; + + if (flags != p->flags) { + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + + p->flags = flags; + br_port_flags_change(p, mask); + } + return 0; +} + +static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->path_cost); +} + +static BRPORT_ATTR(path_cost, 0644, + show_path_cost, br_stp_set_path_cost); + +static ssize_t show_priority(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->priority); +} + +static BRPORT_ATTR(priority, 0644, + show_priority, br_stp_set_port_priority); + +static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) +{ + return br_show_bridge_id(buf, &p->designated_root); +} +static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL); + +static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) +{ + return br_show_bridge_id(buf, &p->designated_bridge); +} +static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL); + +static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->designated_port); +} +static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL); + +static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->designated_cost); +} +static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL); + +static ssize_t show_port_id(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "0x%x\n", p->port_id); +} +static BRPORT_ATTR(port_id, 0444, show_port_id, NULL); + +static ssize_t show_port_no(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "0x%x\n", p->port_no); +} + +static BRPORT_ATTR(port_no, 0444, show_port_no, NULL); + +static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->topology_change_ack); +} +static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL); + +static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->config_pending); +} +static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL); + +static ssize_t show_port_state(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->state); +} +static BRPORT_ATTR(state, 0444, show_port_state, NULL); + +static ssize_t show_message_age_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer)); +} +static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL); + +static ssize_t show_forward_delay_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer)); +} +static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL); + +static ssize_t show_hold_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer)); +} +static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL); + +static int store_flush(struct net_bridge_port *p, unsigned long v) +{ + br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry + return 0; +} +static BRPORT_ATTR(flush, 0200, NULL, store_flush); + +static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%#x\n", p->group_fwd_mask); +} + +static int store_group_fwd_mask(struct net_bridge_port *p, + unsigned long v) +{ + if (v & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = v; + + return 0; +} +static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask, + store_group_fwd_mask); + +static ssize_t show_backup_port(struct net_bridge_port *p, char *buf) +{ + struct net_bridge_port *backup_p; + int ret = 0; + + rcu_read_lock(); + backup_p = rcu_dereference(p->backup_port); + if (backup_p) + ret = sprintf(buf, "%s\n", backup_p->dev->name); + rcu_read_unlock(); + + return ret; +} + +static int store_backup_port(struct net_bridge_port *p, char *buf) +{ + struct net_device *backup_dev = NULL; + char *nl = strchr(buf, '\n'); + + if (nl) + *nl = '\0'; + + if (strlen(buf) > 0) { + backup_dev = __dev_get_by_name(dev_net(p->dev), buf); + if (!backup_dev) + return -ENOENT; + } + + return nbp_backup_change(p, backup_dev); +} +static BRPORT_ATTR_RAW(backup_port, 0644, show_backup_port, store_backup_port); + +BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); +BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); +BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); +BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); +BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); +BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); +BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); +BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS); +BRPORT_ATTR_FLAG(isolated, BR_ISOLATED); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->multicast_router); +} + +static int store_multicast_router(struct net_bridge_port *p, + unsigned long v) +{ + return br_multicast_set_port_router(p, v); +} +static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, + store_multicast_router); + +BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); +BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST); +#endif + +static const struct brport_attribute *brport_attrs[] = { + &brport_attr_path_cost, + &brport_attr_priority, + &brport_attr_port_id, + &brport_attr_port_no, + &brport_attr_designated_root, + &brport_attr_designated_bridge, + &brport_attr_designated_port, + &brport_attr_designated_cost, + &brport_attr_state, + &brport_attr_change_ack, + &brport_attr_config_pending, + &brport_attr_message_age_timer, + &brport_attr_forward_delay_timer, + &brport_attr_hold_timer, + &brport_attr_flush, + &brport_attr_hairpin_mode, + &brport_attr_bpdu_guard, + &brport_attr_root_block, + &brport_attr_learning, + &brport_attr_unicast_flood, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &brport_attr_multicast_router, + &brport_attr_multicast_fast_leave, + &brport_attr_multicast_to_unicast, +#endif + &brport_attr_proxyarp, + &brport_attr_proxyarp_wifi, + &brport_attr_multicast_flood, + &brport_attr_broadcast_flood, + &brport_attr_group_fwd_mask, + &brport_attr_neigh_suppress, + &brport_attr_isolated, + &brport_attr_backup_port, + NULL +}; + +#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) + +static ssize_t brport_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = kobj_to_brport(kobj); + + if (!brport_attr->show) + return -EINVAL; + + return brport_attr->show(p, buf); +} + +static ssize_t brport_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = kobj_to_brport(kobj); + ssize_t ret = -EINVAL; + unsigned long val; + char *endp; + + if (!ns_capable(dev_net(p->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (brport_attr->store_raw) { + char *buf_copy; + + buf_copy = kstrndup(buf, count, GFP_KERNEL); + if (!buf_copy) { + ret = -ENOMEM; + goto out_unlock; + } + spin_lock_bh(&p->br->lock); + ret = brport_attr->store_raw(p, buf_copy); + spin_unlock_bh(&p->br->lock); + kfree(buf_copy); + } else if (brport_attr->store) { + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + goto out_unlock; + spin_lock_bh(&p->br->lock); + ret = brport_attr->store(p, val); + spin_unlock_bh(&p->br->lock); + } + + if (!ret) { + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + ret = count; + } +out_unlock: + rtnl_unlock(); + + return ret; +} + +const struct sysfs_ops brport_sysfs_ops = { + .show = brport_show, + .store = brport_store, +}; + +/* + * Add sysfs entries to ethernet device added to a bridge. + * Creates a brport subdirectory with bridge attributes. + * Puts symlink in bridge's brif subdirectory + */ +int br_sysfs_addif(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + const struct brport_attribute **a; + int err; + + err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, + SYSFS_BRIDGE_PORT_LINK); + if (err) + return err; + + for (a = brport_attrs; *a; ++a) { + err = sysfs_create_file(&p->kobj, &((*a)->attr)); + if (err) + return err; + } + + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name); +} + +/* Rename bridge's brif symlink */ +int br_sysfs_renameif(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + int err; + + /* If a rename fails, the rollback will cause another + * rename call with the existing name. + */ + if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ)) + return 0; + + err = sysfs_rename_link(br->ifobj, &p->kobj, + p->sysfs_name, p->dev->name); + if (err) + netdev_notice(br->dev, "unable to rename link %s to %s", + p->sysfs_name, p->dev->name); + else + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + + return err; +} diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c new file mode 100644 index 000000000..1dc5db076 --- /dev/null +++ b/net/bridge/br_vlan.c @@ -0,0 +1,2097 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/switchdev.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid); + +static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct net_bridge_vlan *vle = ptr; + u16 vid = *(u16 *)arg->key; + + return vle->vid != vid; +} + +static const struct rhashtable_params br_vlan_rht_params = { + .head_offset = offsetof(struct net_bridge_vlan, vnode), + .key_offset = offsetof(struct net_bridge_vlan, vid), + .key_len = sizeof(u16), + .nelem_hint = 3, + .max_size = VLAN_N_VID, + .obj_cmpfn = br_vlan_cmp, + .automatic_shrinking = true, +}; + +static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) +{ + return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); +} + +static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, + const struct net_bridge_vlan *v) +{ + if (vg->pvid == v->vid) + return false; + + smp_wmb(); + br_vlan_set_pvid_state(vg, v->state); + vg->pvid = v->vid; + + return true; +} + +static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) +{ + if (vg->pvid != vid) + return false; + + smp_wmb(); + vg->pvid = 0; + + return true; +} + +/* return true if anything changed, false otherwise */ +static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) +{ + struct net_bridge_vlan_group *vg; + u16 old_flags = v->flags; + bool ret; + + if (br_vlan_is_master(v)) + vg = br_vlan_group(v->br); + else + vg = nbp_vlan_group(v->port); + + if (flags & BRIDGE_VLAN_INFO_PVID) + ret = __vlan_add_pvid(vg, v); + else + ret = __vlan_delete_pvid(vg, v->vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; + else + v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; + + return ret || !!(old_flags ^ v->flags); +} + +static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, + struct net_bridge_vlan *v, u16 flags, + struct netlink_ext_ack *extack) +{ + int err; + + /* Try switchdev op first. In case it is not supported, fallback to + * 8021q add. + */ + err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); + if (err == -EOPNOTSUPP) + return vlan_vid_add(dev, br->vlan_proto, v->vid); + v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV; + return err; +} + +static void __vlan_add_list(struct net_bridge_vlan *v) +{ + struct net_bridge_vlan_group *vg; + struct list_head *headp, *hpos; + struct net_bridge_vlan *vent; + + if (br_vlan_is_master(v)) + vg = br_vlan_group(v->br); + else + vg = nbp_vlan_group(v->port); + + headp = &vg->vlan_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct net_bridge_vlan, vlist); + if (v->vid < vent->vid) + continue; + else + break; + } + list_add_rcu(&v->vlist, hpos); +} + +static void __vlan_del_list(struct net_bridge_vlan *v) +{ + list_del_rcu(&v->vlist); +} + +static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, + const struct net_bridge_vlan *v) +{ + int err; + + /* Try switchdev op first. In case it is not supported, fallback to + * 8021q del. + */ + err = br_switchdev_port_vlan_del(dev, v->vid); + if (!(v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)) + vlan_vid_del(dev, br->vlan_proto, v->vid); + return err == -EOPNOTSUPP ? 0 : err; +} + +/* Returns a master vlan, if it didn't exist it gets created. In all cases + * a reference is taken to the master vlan before returning. + */ +static struct net_bridge_vlan * +br_vlan_get_master(struct net_bridge *br, u16 vid, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *masterv; + + vg = br_vlan_group(br); + masterv = br_vlan_find(vg, vid); + if (!masterv) { + bool changed; + + /* missing global ctx, create it now */ + if (br_vlan_add(br, vid, 0, &changed, extack)) + return NULL; + masterv = br_vlan_find(vg, vid); + if (WARN_ON(!masterv)) + return NULL; + refcount_set(&masterv->refcnt, 1); + return masterv; + } + refcount_inc(&masterv->refcnt); + + return masterv; +} + +static void br_master_vlan_rcu_free(struct rcu_head *rcu) +{ + struct net_bridge_vlan *v; + + v = container_of(rcu, struct net_bridge_vlan, rcu); + WARN_ON(!br_vlan_is_master(v)); + free_percpu(v->stats); + v->stats = NULL; + kfree(v); +} + +static void br_vlan_put_master(struct net_bridge_vlan *masterv) +{ + struct net_bridge_vlan_group *vg; + + if (!br_vlan_is_master(masterv)) + return; + + vg = br_vlan_group(masterv->br); + if (refcount_dec_and_test(&masterv->refcnt)) { + rhashtable_remove_fast(&vg->vlan_hash, + &masterv->vnode, br_vlan_rht_params); + __vlan_del_list(masterv); + call_rcu(&masterv->rcu, br_master_vlan_rcu_free); + } +} + +static void nbp_vlan_rcu_free(struct rcu_head *rcu) +{ + struct net_bridge_vlan *v; + + v = container_of(rcu, struct net_bridge_vlan, rcu); + WARN_ON(br_vlan_is_master(v)); + /* if we had per-port stats configured then free them here */ + if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS) + free_percpu(v->stats); + v->stats = NULL; + kfree(v); +} + +/* This is the shared VLAN add function which works for both ports and bridge + * devices. There are four possible calls to this function in terms of the + * vlan entry type: + * 1. vlan is being added on a port (no master flags, global entry exists) + * 2. vlan is being added on a bridge (both master and brentry flags) + * 3. vlan is being added on a port, but a global entry didn't exist which + * is being created right now (master flag set, brentry flag unset), the + * global entry is used for global per-vlan features, but not for filtering + * 4. same as 3 but with both master and brentry flags set so the entry + * will be used for filtering in both the port and the bridge + */ +static int __vlan_add(struct net_bridge_vlan *v, u16 flags, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan *masterv = NULL; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan_group *vg; + struct net_device *dev; + struct net_bridge *br; + int err; + + if (br_vlan_is_master(v)) { + br = v->br; + dev = br->dev; + vg = br_vlan_group(br); + } else { + p = v->port; + br = p->br; + dev = p->dev; + vg = nbp_vlan_group(p); + } + + if (p) { + /* Add VLAN to the device filter if it is supported. + * This ensures tagged traffic enters the bridge when + * promiscuous mode is disabled by br_manage_promisc(). + */ + err = __vlan_vid_add(dev, br, v, flags, extack); + if (err) + goto out; + + /* need to work on the master vlan too */ + if (flags & BRIDGE_VLAN_INFO_MASTER) { + bool changed; + + err = br_vlan_add(br, v->vid, + flags | BRIDGE_VLAN_INFO_BRENTRY, + &changed, extack); + if (err) + goto out_filt; + + if (changed) + br_vlan_notify(br, NULL, v->vid, 0, + RTM_NEWVLAN); + } + + masterv = br_vlan_get_master(br, v->vid, extack); + if (!masterv) { + err = -ENOMEM; + goto out_filt; + } + v->brvlan = masterv; + if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { + v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + if (!v->stats) { + err = -ENOMEM; + goto out_filt; + } + v->priv_flags |= BR_VLFLAG_PER_PORT_STATS; + } else { + v->stats = masterv->stats; + } + } else { + err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); + if (err && err != -EOPNOTSUPP) + goto out; + } + + /* Add the dev mac and count the vlan only if it's usable */ + if (br_vlan_should_use(v)) { + err = br_fdb_insert(br, p, dev->dev_addr, v->vid); + if (err) { + br_err(br, "failed insert local address into bridge forwarding table\n"); + goto out_filt; + } + vg->num_vlans++; + } + + /* set the state before publishing */ + v->state = BR_STATE_FORWARDING; + + err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode, + br_vlan_rht_params); + if (err) + goto out_fdb_insert; + + __vlan_add_list(v); + __vlan_add_flags(v, flags); + + if (p) + nbp_vlan_set_vlan_dev_state(p, v->vid); +out: + return err; + +out_fdb_insert: + if (br_vlan_should_use(v)) { + br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid); + vg->num_vlans--; + } + +out_filt: + if (p) { + __vlan_vid_del(dev, br, v); + if (masterv) { + if (v->stats && masterv->stats != v->stats) + free_percpu(v->stats); + v->stats = NULL; + + br_vlan_put_master(masterv); + v->brvlan = NULL; + } + } else { + br_switchdev_port_vlan_del(dev, v->vid); + } + + goto out; +} + +static int __vlan_del(struct net_bridge_vlan *v) +{ + struct net_bridge_vlan *masterv = v; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + int err = 0; + + if (br_vlan_is_master(v)) { + vg = br_vlan_group(v->br); + } else { + p = v->port; + vg = nbp_vlan_group(v->port); + masterv = v->brvlan; + } + + __vlan_delete_pvid(vg, v->vid); + if (p) { + err = __vlan_vid_del(p->dev, p->br, v); + if (err) + goto out; + } else { + err = br_switchdev_port_vlan_del(v->br->dev, v->vid); + if (err && err != -EOPNOTSUPP) + goto out; + err = 0; + } + + if (br_vlan_should_use(v)) { + v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY; + vg->num_vlans--; + } + + if (masterv != v) { + vlan_tunnel_info_del(vg, v); + rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, + br_vlan_rht_params); + __vlan_del_list(v); + nbp_vlan_set_vlan_dev_state(p, v->vid); + call_rcu(&v->rcu, nbp_vlan_rcu_free); + } + + br_vlan_put_master(masterv); +out: + return err; +} + +static void __vlan_group_free(struct net_bridge_vlan_group *vg) +{ + WARN_ON(!list_empty(&vg->vlan_list)); + rhashtable_destroy(&vg->vlan_hash); + vlan_tunnel_deinit(vg); + kfree(vg); +} + +static void __vlan_flush(const struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vlan, *tmp; + u16 v_start = 0, v_end = 0; + + __vlan_delete_pvid(vg, vg->pvid); + list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) { + /* take care of disjoint ranges */ + if (!v_start) { + v_start = vlan->vid; + } else if (vlan->vid - v_end != 1) { + /* found range end, notify and start next one */ + br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN); + v_start = vlan->vid; + } + v_end = vlan->vid; + + __vlan_del(vlan); + } + + /* notify about the last/whole vlan range */ + if (v_start) + br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN); +} + +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_vlan_group *vg, + struct sk_buff *skb) +{ + struct br_vlan_stats *stats; + struct net_bridge_vlan *v; + u16 vid; + + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + goto out; + + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id has untagged flag set, + * send untagged; otherwise, send tagged. + */ + br_vlan_get_tag(skb, &vid); + v = br_vlan_find(vg, vid); + /* Vlan entry must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!v || !br_vlan_should_use(v)) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_bytes += skb->len; + stats->tx_packets++; + u64_stats_update_end(&stats->syncp); + } + + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + __vlan_hwaccel_clear_tag(skb); + + if (p && (p->flags & BR_VLAN_TUNNEL) && + br_handle_egress_vlan_tunnel(skb, v)) { + kfree_skb(skb); + return NULL; + } +out: + return skb; +} + +/* Called under RCU */ +static bool __allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, + struct sk_buff *skb, u16 *vid, + u8 *state) +{ + struct br_vlan_stats *stats; + struct net_bridge_vlan *v; + bool tagged; + + BR_INPUT_SKB_CB(skb)->vlan_filtered = true; + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!skb_vlan_tag_present(skb) && + skb->protocol == br->vlan_proto)) { + skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + + if (!br_vlan_get_tag(skb, vid)) { + /* Tagged frame */ + if (skb->vlan_proto != br->vlan_proto) { + /* Protocol-mismatch, empty out vlan_tci for new tag */ + skb_push(skb, ETH_HLEN); + skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, + skb_vlan_tag_get(skb)); + if (unlikely(!skb)) + return false; + + skb_pull(skb, ETH_HLEN); + skb_reset_mac_len(skb); + *vid = 0; + tagged = false; + } else { + tagged = true; + } + } else { + /* Untagged frame */ + tagged = false; + } + + if (!*vid) { + u16 pvid = br_get_pvid(vg); + + /* Frame had a tag with VID 0 or did not have a tag. + * See if pvid is set on this port. That tells us which + * vlan untagged or priority-tagged traffic belongs to. + */ + if (!pvid) + goto drop; + + /* PVID is set on this port. Any untagged or priority-tagged + * ingress frame is considered to belong to this vlan. + */ + *vid = pvid; + if (likely(!tagged)) + /* Untagged Frame. */ + __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid); + else + /* Priority-tagged Frame. + * At this point, we know that skb->vlan_tci VID + * field was 0. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; + + /* if stats are disabled we can avoid the lookup */ + if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { + if (*state == BR_STATE_FORWARDING) { + *state = br_vlan_get_pvid_state(vg); + if (!br_vlan_state_allowed(*state, true)) + goto drop; + } + return true; + } + } + v = br_vlan_find(vg, *vid); + if (!v || !br_vlan_should_use(v)) + goto drop; + + if (*state == BR_STATE_FORWARDING) { + *state = br_vlan_get_state(v); + if (!br_vlan_state_allowed(*state, true)) + goto drop; + } + + if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { + stats = this_cpu_ptr(v->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += skb->len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); + } + + return true; + +drop: + kfree_skb(skb); + return false; +} + +bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, + u16 *vid, u8 *state) +{ + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br_opt_get(br, BROPT_VLAN_ENABLED)) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; + return true; + } + + return __allowed_ingress(br, vg, skb, vid, state); +} + +/* Called under RCU. */ +bool br_allowed_egress(struct net_bridge_vlan_group *vg, + const struct sk_buff *skb) +{ + const struct net_bridge_vlan *v; + u16 vid; + + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + return true; + + br_vlan_get_tag(skb, &vid); + v = br_vlan_find(vg, vid); + if (v && br_vlan_should_use(v) && + br_vlan_state_allowed(br_vlan_get_state(v), false)) + return true; + + return false; +} + +/* Called under RCU */ +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge *br = p->br; + struct net_bridge_vlan *v; + + /* If filtering was disabled at input, let it pass. */ + if (!br_opt_get(br, BROPT_VLAN_ENABLED)) + return true; + + vg = nbp_vlan_group_rcu(p); + if (!vg || !vg->num_vlans) + return false; + + if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) + *vid = 0; + + if (!*vid) { + *vid = br_get_pvid(vg); + if (!*vid || + !br_vlan_state_allowed(br_vlan_get_pvid_state(vg), true)) + return false; + + return true; + } + + v = br_vlan_find(vg, *vid); + if (v && br_vlan_state_allowed(br_vlan_get_state(v), true)) + return true; + + return false; +} + +static int br_vlan_add_existing(struct net_bridge *br, + struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan, + u16 flags, bool *changed, + struct netlink_ext_ack *extack) +{ + int err; + + err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack); + if (err && err != -EOPNOTSUPP) + return err; + + if (!br_vlan_is_brentry(vlan)) { + /* Trying to change flags of non-existent bridge vlan */ + if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) { + err = -EINVAL; + goto err_flags; + } + /* It was only kept for port vlans, now make it real */ + err = br_fdb_insert(br, NULL, br->dev->dev_addr, + vlan->vid); + if (err) { + br_err(br, "failed to insert local address into bridge forwarding table\n"); + goto err_fdb_insert; + } + + refcount_inc(&vlan->refcnt); + vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; + vg->num_vlans++; + *changed = true; + } + + if (__vlan_add_flags(vlan, flags)) + *changed = true; + + return 0; + +err_fdb_insert: +err_flags: + br_switchdev_port_vlan_del(br->dev, vlan->vid); + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated + */ +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + int ret; + + ASSERT_RTNL(); + + *changed = false; + vg = br_vlan_group(br); + vlan = br_vlan_find(vg, vid); + if (vlan) + return br_vlan_add_existing(br, vg, vlan, flags, changed, + extack); + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; + + vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + if (!vlan->stats) { + kfree(vlan); + return -ENOMEM; + } + vlan->vid = vid; + vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; + vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; + vlan->br = br; + if (flags & BRIDGE_VLAN_INFO_BRENTRY) + refcount_set(&vlan->refcnt, 1); + ret = __vlan_add(vlan, flags, extack); + if (ret) { + free_percpu(vlan->stats); + kfree(vlan); + } else { + *changed = true; + } + + return ret; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + vg = br_vlan_group(br); + v = br_vlan_find(vg, vid); + if (!v || !br_vlan_is_brentry(v)) + return -ENOENT; + + br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); + br_fdb_delete_by_port(br, NULL, vid, 0); + + vlan_tunnel_info_del(vg, v); + + return __vlan_del(v); +} + +void br_vlan_flush(struct net_bridge *br) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + + vg = br_vlan_group(br); + __vlan_flush(br, NULL, vg); + RCU_INIT_POINTER(br->vlgrp, NULL); + synchronize_rcu(); + __vlan_group_free(vg); +} + +struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid) +{ + if (!vg) + return NULL; + + return br_vlan_lookup(&vg->vlan_hash, vid); +} + +/* Must be protected by RTNL. */ +static void recalculate_group_addr(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_GROUP_ADDR_SET)) + return; + + spin_lock_bh(&br->lock); + if (!br_opt_get(br, BROPT_VLAN_ENABLED) || + br->vlan_proto == htons(ETH_P_8021Q)) { + /* Bridge Group Address */ + br->group_addr[5] = 0x00; + } else { /* vlan_enabled && ETH_P_8021AD */ + /* Provider Bridge Group Address */ + br->group_addr[5] = 0x08; + } + spin_unlock_bh(&br->lock); +} + +/* Must be protected by RTNL. */ +void br_recalculate_fwd_mask(struct net_bridge *br) +{ + if (!br_opt_get(br, BROPT_VLAN_ENABLED) || + br->vlan_proto == htons(ETH_P_8021Q)) + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + else /* vlan_enabled && ETH_P_8021AD */ + br->group_fwd_mask_required = BR_GROUPFWD_8021AD & + ~(1u << br->group_addr[5]); +} + +int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = val, + }; + int err; + + if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val) + return 0; + + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + + br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); + br_manage_promisc(br); + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + + return 0; +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + return __br_vlan_filter_toggle(br, val); +} + +bool br_vlan_enabled(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_opt_get(br, BROPT_VLAN_ENABLED); +} +EXPORT_SYMBOL_GPL(br_vlan_enabled); + +int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto) +{ + struct net_bridge *br = netdev_priv(dev); + + *p_proto = ntohs(br->vlan_proto); + + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_proto); + +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) +{ + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_protocol = ntohs(proto), + }; + int err = 0; + struct net_bridge_port *p; + struct net_bridge_vlan *vlan; + struct net_bridge_vlan_group *vg; + __be16 oldproto = br->vlan_proto; + + if (br->vlan_proto == proto) + return 0; + + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + + /* Add VLANs for the new proto to the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; + err = vlan_vid_add(p->dev, proto, vlan->vid); + if (err) + goto err_filt; + } + } + + br->vlan_proto = proto; + + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + + /* Delete VLANs for the old proto from the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; + vlan_vid_del(p->dev, oldproto, vlan->vid); + } + } + + return 0; + +err_filt: + attr.u.vlan_protocol = ntohs(oldproto); + switchdev_port_attr_set(br->dev, &attr); + + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; + vlan_vid_del(p->dev, proto, vlan->vid); + } + + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; + vlan_vid_del(p->dev, proto, vlan->vid); + } + } + + return err; +} + +int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +{ + if (val != ETH_P_8021Q && val != ETH_P_8021AD) + return -EPROTONOSUPPORT; + + return __br_vlan_set_proto(br, htons(val)); +} + +int br_vlan_set_stats(struct net_bridge *br, unsigned long val) +{ + switch (val) { + case 0: + case 1: + br_opt_toggle(br, BROPT_VLAN_STATS_ENABLED, !!val); + break; + default: + return -EINVAL; + } + + return 0; +} + +int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val) +{ + struct net_bridge_port *p; + + /* allow to change the option if there are no port vlans configured */ + list_for_each_entry(p, &br->port_list, list) { + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); + + if (vg->num_vlans) + return -EBUSY; + } + + switch (val) { + case 0: + case 1: + br_opt_toggle(br, BROPT_VLAN_STATS_PER_PORT, !!val); + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) +{ + struct net_bridge_vlan *v; + + if (vid != vg->pvid) + return false; + + v = br_vlan_lookup(&vg->vlan_hash, vid); + if (v && br_vlan_should_use(v) && + (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)) + return true; + + return false; +} + +static void br_vlan_disable_default_pvid(struct net_bridge *br) +{ + struct net_bridge_port *p; + u16 pvid = br->default_pvid; + + /* Disable default_pvid on all ports where it is still + * configured. + */ + if (vlan_default_pvid(br_vlan_group(br), pvid)) { + if (!br_vlan_delete(br, pvid)) + br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN); + } + + list_for_each_entry(p, &br->port_list, list) { + if (vlan_default_pvid(nbp_vlan_group(p), pvid) && + !nbp_vlan_delete(p, pvid)) + br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN); + } + + br->default_pvid = 0; +} + +int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, + struct netlink_ext_ack *extack) +{ + const struct net_bridge_vlan *pvent; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + unsigned long *changed; + bool vlchange; + u16 old_pvid; + int err = 0; + + if (!pvid) { + br_vlan_disable_default_pvid(br); + return 0; + } + + changed = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL); + if (!changed) + return -ENOMEM; + + old_pvid = br->default_pvid; + + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + vg = br_vlan_group(br); + pvent = br_vlan_find(vg, pvid); + if ((!old_pvid || vlan_default_pvid(vg, old_pvid)) && + (!pvent || !br_vlan_should_use(pvent))) { + err = br_vlan_add(br, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange, extack); + if (err) + goto out; + + if (br_vlan_delete(br, old_pvid)) + br_vlan_notify(br, NULL, old_pvid, 0, RTM_DELVLAN); + br_vlan_notify(br, NULL, pvid, 0, RTM_NEWVLAN); + set_bit(0, changed); + } + + list_for_each_entry(p, &br->port_list, list) { + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + vg = nbp_vlan_group(p); + if ((old_pvid && + !vlan_default_pvid(vg, old_pvid)) || + br_vlan_find(vg, pvid)) + continue; + + err = nbp_vlan_add(p, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange, extack); + if (err) + goto err_port; + if (nbp_vlan_delete(p, old_pvid)) + br_vlan_notify(br, p, old_pvid, 0, RTM_DELVLAN); + br_vlan_notify(p->br, p, pvid, 0, RTM_NEWVLAN); + set_bit(p->port_no, changed); + } + + br->default_pvid = pvid; + +out: + bitmap_free(changed); + return err; + +err_port: + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!test_bit(p->port_no, changed)) + continue; + + if (old_pvid) { + nbp_vlan_add(p, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange, NULL); + br_vlan_notify(p->br, p, old_pvid, 0, RTM_NEWVLAN); + } + nbp_vlan_delete(p, pvid); + br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN); + } + + if (test_bit(0, changed)) { + if (old_pvid) { + br_vlan_add(br, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange, NULL); + br_vlan_notify(br, NULL, old_pvid, 0, RTM_NEWVLAN); + } + br_vlan_delete(br, pvid); + br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN); + } + goto out; +} + +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val) +{ + u16 pvid = val; + int err = 0; + + if (val >= VLAN_VID_MASK) + return -EINVAL; + + if (pvid == br->default_pvid) + goto out; + + /* Only allow default pvid change when filtering is disabled */ + if (br_opt_get(br, BROPT_VLAN_ENABLED)) { + pr_info_once("Please disable vlan filtering to change default_pvid\n"); + err = -EPERM; + goto out; + } + err = __br_vlan_set_default_pvid(br, pvid, NULL); +out: + return err; +} + +int br_vlan_init(struct net_bridge *br) +{ + struct net_bridge_vlan_group *vg; + int ret = -ENOMEM; + + vg = kzalloc(sizeof(*vg), GFP_KERNEL); + if (!vg) + goto out; + ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); + if (ret) + goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; + INIT_LIST_HEAD(&vg->vlan_list); + br->vlan_proto = htons(ETH_P_8021Q); + br->default_pvid = 1; + rcu_assign_pointer(br->vlgrp, vg); + +out: + return ret; + +err_tunnel_init: + rhashtable_destroy(&vg->vlan_hash); +err_rhtbl: + kfree(vg); + + goto out; +} + +int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack) +{ + struct switchdev_attr attr = { + .orig_dev = p->br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = br_opt_get(p->br, BROPT_VLAN_ENABLED), + }; + struct net_bridge_vlan_group *vg; + int ret = -ENOMEM; + + vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL); + if (!vg) + goto out; + + ret = switchdev_port_attr_set(p->dev, &attr); + if (ret && ret != -EOPNOTSUPP) + goto err_vlan_enabled; + + ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); + if (ret) + goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; + INIT_LIST_HEAD(&vg->vlan_list); + rcu_assign_pointer(p->vlgrp, vg); + if (p->br->default_pvid) { + bool changed; + + ret = nbp_vlan_add(p, p->br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED, + &changed, extack); + if (ret) + goto err_vlan_add; + br_vlan_notify(p->br, p, p->br->default_pvid, 0, RTM_NEWVLAN); + } +out: + return ret; + +err_vlan_add: + RCU_INIT_POINTER(p->vlgrp, NULL); + synchronize_rcu(); + vlan_tunnel_deinit(vg); +err_tunnel_init: + rhashtable_destroy(&vg->vlan_hash); +err_rhtbl: +err_vlan_enabled: + kfree(vg); + + goto out; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated + */ +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed, struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan *vlan; + int ret; + + ASSERT_RTNL(); + + *changed = false; + vlan = br_vlan_find(nbp_vlan_group(port), vid); + if (vlan) { + /* Pass the flags to the hardware bridge */ + ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack); + if (ret && ret != -EOPNOTSUPP) + return ret; + *changed = __vlan_add_flags(vlan, flags); + + return 0; + } + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; + + vlan->vid = vid; + vlan->port = port; + ret = __vlan_add(vlan, flags, extack); + if (ret) + kfree(vlan); + else + *changed = true; + + return ret; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + v = br_vlan_find(nbp_vlan_group(port), vid); + if (!v) + return -ENOENT; + br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); + br_fdb_delete_by_port(port->br, port, vid, 0); + + return __vlan_del(v); +} + +void nbp_vlan_flush(struct net_bridge_port *port) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + __vlan_flush(port->br, port, vg); + RCU_INIT_POINTER(port->vlgrp, NULL); + synchronize_rcu(); + __vlan_group_free(vg); +} + +void br_vlan_get_stats(const struct net_bridge_vlan *v, + struct br_vlan_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, txpackets, txbytes; + struct br_vlan_stats *cpu_stats; + unsigned int start; + + cpu_stats = per_cpu_ptr(v->stats, i); + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rxpackets = cpu_stats->rx_packets; + rxbytes = cpu_stats->rx_bytes; + txbytes = cpu_stats->tx_bytes; + txpackets = cpu_stats->tx_packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->tx_bytes += txbytes; + stats->tx_packets += txpackets; + } +} + +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); + if (p) + vg = nbp_vlan_group(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_pvid); + +int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); + +int br_vlan_get_info(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); + if (p) + vg = nbp_vlan_group(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group(netdev_priv(dev)); + else + return -EINVAL; + + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + p_vinfo->vid = vid; + p_vinfo->flags = v->flags; + if (vid == br_get_pvid(vg)) + p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_info); + +static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) +{ + return is_vlan_dev(dev) && + !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING); +} + +static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, + __always_unused struct netdev_nested_priv *priv) +{ + return br_vlan_is_bind_vlan_dev(dev); +} + +static bool br_vlan_has_upper_bind_vlan_dev(struct net_device *dev) +{ + int found; + + rcu_read_lock(); + found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn, + NULL); + rcu_read_unlock(); + + return !!found; +} + +struct br_vlan_bind_walk_data { + u16 vid; + struct net_device *result; +}; + +static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct br_vlan_bind_walk_data *data = priv->data; + int found = 0; + + if (br_vlan_is_bind_vlan_dev(dev) && + vlan_dev_priv(dev)->vlan_id == data->vid) { + data->result = dev; + found = 1; + } + + return found; +} + +static struct net_device * +br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) +{ + struct br_vlan_bind_walk_data data = { + .vid = vid, + }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; + + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, + &priv); + rcu_read_unlock(); + + return data.result; +} + +static bool br_vlan_is_dev_up(const struct net_device *dev) +{ + return !!(dev->flags & IFF_UP) && netif_oper_up(dev); +} + +static void br_vlan_set_vlan_dev_state(const struct net_bridge *br, + struct net_device *vlan_dev) +{ + u16 vid = vlan_dev_priv(vlan_dev)->vlan_id; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + bool has_carrier = false; + + if (!netif_carrier_ok(br->dev)) { + netif_carrier_off(vlan_dev); + return; + } + + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) { + has_carrier = true; + break; + } + } + + if (has_carrier) + netif_carrier_on(vlan_dev); + else + netif_carrier_off(vlan_dev); +} + +static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p) +{ + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); + struct net_bridge_vlan *vlan; + struct net_device *vlan_dev; + + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, + vlan->vid); + if (vlan_dev) { + if (br_vlan_is_dev_up(p->dev)) { + if (netif_carrier_ok(p->br->dev)) + netif_carrier_on(vlan_dev); + } else { + br_vlan_set_vlan_dev_state(p->br, vlan_dev); + } + } + } +} + +static void br_vlan_upper_change(struct net_device *dev, + struct net_device *upper_dev, + bool linking) +{ + struct net_bridge *br = netdev_priv(dev); + + if (!br_vlan_is_bind_vlan_dev(upper_dev)) + return; + + if (linking) { + br_vlan_set_vlan_dev_state(br, upper_dev); + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true); + } else { + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, + br_vlan_has_upper_bind_vlan_dev(dev)); + } +} + +struct br_vlan_link_state_walk_data { + struct net_bridge *br; +}; + +static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, + struct netdev_nested_priv *priv) +{ + struct br_vlan_link_state_walk_data *data = priv->data; + + if (br_vlan_is_bind_vlan_dev(vlan_dev)) + br_vlan_set_vlan_dev_state(data->br, vlan_dev); + + return 0; +} + +static void br_vlan_link_state_change(struct net_device *dev, + struct net_bridge *br) +{ + struct br_vlan_link_state_walk_data data = { + .br = br + }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; + + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, + &priv); + rcu_read_unlock(); +} + +/* Must be protected by RTNL. */ +static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid) +{ + struct net_device *vlan_dev; + + if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) + return; + + vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vid); + if (vlan_dev) + br_vlan_set_vlan_dev_state(p->br, vlan_dev); +} + +/* Must be protected by RTNL. */ +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info; + struct net_bridge *br = netdev_priv(dev); + int vlcmd = 0, ret = 0; + bool changed = false; + + switch (event) { + case NETDEV_REGISTER: + ret = br_vlan_add(br, br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL); + vlcmd = RTM_NEWVLAN; + break; + case NETDEV_UNREGISTER: + changed = !br_vlan_delete(br, br->default_pvid); + vlcmd = RTM_DELVLAN; + break; + case NETDEV_CHANGEUPPER: + info = ptr; + br_vlan_upper_change(dev, info->upper_dev, info->linking); + break; + + case NETDEV_CHANGE: + case NETDEV_UP: + if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING)) + break; + br_vlan_link_state_change(dev, br); + break; + } + if (changed) + br_vlan_notify(br, NULL, br->default_pvid, 0, vlcmd); + + return ret; +} + +/* Must be protected by RTNL. */ +void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) +{ + if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) + return; + + switch (event) { + case NETDEV_CHANGE: + case NETDEV_DOWN: + case NETDEV_UP: + br_vlan_set_all_vlan_dev_state(p); + break; + } +} + +static bool br_vlan_stats_fill(struct sk_buff *skb, + const struct net_bridge_vlan *v) +{ + struct br_vlan_stats stats; + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_STATS); + if (!nest) + return false; + + br_vlan_get_stats(v, &stats); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, + stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, + stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + goto out_err; + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + +/* v_opts is used to dump the options which must be equal in the whole range */ +static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, + const struct net_bridge_vlan *v_opts, + u16 flags, + bool dump_stats) +{ + struct bridge_vlan_info info; + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY); + if (!nest) + return false; + + memset(&info, 0, sizeof(info)); + info.vid = vid; + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + info.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + if (flags & BRIDGE_VLAN_INFO_PVID) + info.flags |= BRIDGE_VLAN_INFO_PVID; + + if (nla_put(skb, BRIDGE_VLANDB_ENTRY_INFO, sizeof(info), &info)) + goto out_err; + + if (vid_range && vid < vid_range && + !(flags & BRIDGE_VLAN_INFO_PVID) && + nla_put_u16(skb, BRIDGE_VLANDB_ENTRY_RANGE, vid_range)) + goto out_err; + + if (v_opts) { + if (!br_vlan_opts_fill(skb, v_opts)) + goto out_err; + + if (dump_stats && !br_vlan_stats_fill(skb, v_opts)) + goto out_err; + } + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + +static size_t rtnl_vlan_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY */ + + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_ENTRY_RANGE */ + + nla_total_size(sizeof(struct bridge_vlan_info)) /* BRIDGE_VLANDB_ENTRY_INFO */ + + br_vlan_opts_nl_size(); /* bridge vlan options */ +} + +void br_vlan_notify(const struct net_bridge *br, + const struct net_bridge_port *p, + u16 vid, u16 vid_range, + int cmd) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v = NULL; + struct br_vlan_msg *bvm; + struct nlmsghdr *nlh; + struct sk_buff *skb; + int err = -ENOBUFS; + struct net *net; + u16 flags = 0; + int ifindex; + + /* right now notifications are done only with rtnl held */ + ASSERT_RTNL(); + + if (p) { + ifindex = p->dev->ifindex; + vg = nbp_vlan_group(p); + net = dev_net(p->dev); + } else { + ifindex = br->dev->ifindex; + vg = br_vlan_group(br); + net = dev_net(br->dev); + } + + skb = nlmsg_new(rtnl_vlan_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out_err; + + err = -EMSGSIZE; + nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*bvm), 0); + if (!nlh) + goto out_err; + bvm = nlmsg_data(nlh); + memset(bvm, 0, sizeof(*bvm)); + bvm->family = AF_BRIDGE; + bvm->ifindex = ifindex; + + switch (cmd) { + case RTM_NEWVLAN: + /* need to find the vlan due to flags/options */ + v = br_vlan_find(vg, vid); + if (!v || !br_vlan_should_use(v)) + goto out_kfree; + + flags = v->flags; + if (br_get_pvid(vg) == v->vid) + flags |= BRIDGE_VLAN_INFO_PVID; + break; + case RTM_DELVLAN: + break; + default: + goto out_kfree; + } + + if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false)) + goto out_err; + + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL); + return; + +out_err: + rtnl_set_sk_err(net, RTNLGRP_BRVLAN, err); +out_kfree: + kfree_skb(skb); +} + +/* check if v_curr can enter a range ending in range_end */ +bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return v_curr->vid - range_end->vid == 1 && + range_end->flags == v_curr->flags && + br_vlan_opts_eq_range(v_curr, range_end); +} + +static int br_vlan_dump_dev(const struct net_device *dev, + struct sk_buff *skb, + struct netlink_callback *cb, + u32 dump_flags) +{ + struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; + bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); + struct net_bridge_vlan_group *vg; + int idx = 0, s_idx = cb->args[1]; + struct nlmsghdr *nlh = NULL; + struct net_bridge_port *p; + struct br_vlan_msg *bvm; + struct net_bridge *br; + int err = 0; + u16 pvid; + + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) + return -EINVAL; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + vg = br_vlan_group_rcu(br); + p = NULL; + } else { + p = br_port_get_rcu(dev); + if (WARN_ON(!p)) + return -EINVAL; + vg = nbp_vlan_group_rcu(p); + br = p->br; + } + + if (!vg) + return 0; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWVLAN, sizeof(*bvm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + bvm = nlmsg_data(nlh); + memset(bvm, 0, sizeof(*bvm)); + bvm->family = PF_BRIDGE; + bvm->ifindex = dev->ifindex; + pvid = br_get_pvid(vg); + + /* idx must stay at range's beginning until it is filled in */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + if (idx < s_idx) { + idx++; + continue; + } + + if (!range_start) { + range_start = v; + range_end = v; + continue; + } + + if (dump_stats || v->vid == pvid || + !br_vlan_can_enter_range(v, range_end)) { + u16 vlan_flags = br_vlan_flags(range_start, pvid); + + if (!br_vlan_fill_vids(skb, range_start->vid, + range_end->vid, range_start, + vlan_flags, dump_stats)) { + err = -EMSGSIZE; + break; + } + /* advance number of filled vlans */ + idx += range_end->vid - range_start->vid + 1; + + range_start = v; + } + range_end = v; + } + + /* err will be 0 and range_start will be set in 3 cases here: + * - first vlan (range_start == range_end) + * - last vlan (range_start == range_end, not in range) + * - last vlan range (range_start != range_end, in range) + */ + if (!err && range_start && + !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, + range_start, br_vlan_flags(range_start, pvid), + dump_stats)) + err = -EMSGSIZE; + + cb->args[1] = err ? idx : 0; + + nlmsg_end(skb, nlh); + + return err; +} + +static const struct nla_policy br_vlan_db_dump_pol[BRIDGE_VLANDB_DUMP_MAX + 1] = { + [BRIDGE_VLANDB_DUMP_FLAGS] = { .type = NLA_U32 }, +}; + +static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *dtb[BRIDGE_VLANDB_DUMP_MAX + 1]; + int idx = 0, err = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + struct br_vlan_msg *bvm; + struct net_device *dev; + u32 dump_flags = 0; + + err = nlmsg_parse(cb->nlh, sizeof(*bvm), dtb, BRIDGE_VLANDB_DUMP_MAX, + br_vlan_db_dump_pol, cb->extack); + if (err < 0) + return err; + + bvm = nlmsg_data(cb->nlh); + if (dtb[BRIDGE_VLANDB_DUMP_FLAGS]) + dump_flags = nla_get_u32(dtb[BRIDGE_VLANDB_DUMP_FLAGS]); + + rcu_read_lock(); + if (bvm->ifindex) { + dev = dev_get_by_index_rcu(net, bvm->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) + goto out_err; + } else { + for_each_netdev_rcu(net, dev) { + if (idx < s_idx) + goto skip; + + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); + if (err == -EMSGSIZE) + break; +skip: + idx++; + } + } + cb->args[0] = idx; + rcu_read_unlock(); + + return skb->len; + +out_err: + rcu_read_unlock(); + + return err; +} + +static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = { + [BRIDGE_VLANDB_ENTRY_INFO] = + NLA_POLICY_EXACT_LEN(sizeof(struct bridge_vlan_info)), + [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 }, + [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED }, +}; + +static int br_vlan_rtm_process_one(struct net_device *dev, + const struct nlattr *attr, + int cmd, struct netlink_ext_ack *extack) +{ + struct bridge_vlan_info *vinfo, vrange_end, *vinfo_last = NULL; + struct nlattr *tb[BRIDGE_VLANDB_ENTRY_MAX + 1]; + bool changed = false, skip_processing = false; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + int err = 0, cmdmap = 0; + struct net_bridge *br; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + } else { + p = br_port_get_rtnl(dev); + if (WARN_ON(!p)) + return -ENODEV; + br = p->br; + vg = nbp_vlan_group(p); + } + + if (WARN_ON(!vg)) + return -ENODEV; + + err = nla_parse_nested(tb, BRIDGE_VLANDB_ENTRY_MAX, attr, + br_vlan_db_policy, extack); + if (err) + return err; + + if (!tb[BRIDGE_VLANDB_ENTRY_INFO]) { + NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry info"); + return -EINVAL; + } + memset(&vrange_end, 0, sizeof(vrange_end)); + + vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]); + if (vinfo->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN | + BRIDGE_VLAN_INFO_RANGE_END)) { + NL_SET_ERR_MSG_MOD(extack, "Old-style vlan ranges are not allowed when using RTM vlan calls"); + return -EINVAL; + } + if (!br_vlan_valid_id(vinfo->vid, extack)) + return -EINVAL; + + if (tb[BRIDGE_VLANDB_ENTRY_RANGE]) { + vrange_end.vid = nla_get_u16(tb[BRIDGE_VLANDB_ENTRY_RANGE]); + /* validate user-provided flags without RANGE_BEGIN */ + vrange_end.flags = BRIDGE_VLAN_INFO_RANGE_END | vinfo->flags; + vinfo->flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + + /* vinfo_last is the range start, vinfo the range end */ + vinfo_last = vinfo; + vinfo = &vrange_end; + + if (!br_vlan_valid_id(vinfo->vid, extack) || + !br_vlan_valid_range(vinfo, vinfo_last, extack)) + return -EINVAL; + } + + switch (cmd) { + case RTM_NEWVLAN: + cmdmap = RTM_SETLINK; + skip_processing = !!(vinfo->flags & BRIDGE_VLAN_INFO_ONLY_OPTS); + break; + case RTM_DELVLAN: + cmdmap = RTM_DELLINK; + break; + } + + if (!skip_processing) { + struct bridge_vlan_info *tmp_last = vinfo_last; + + /* br_process_vlan_info may overwrite vinfo_last */ + err = br_process_vlan_info(br, p, cmdmap, vinfo, &tmp_last, + &changed, extack); + + /* notify first if anything changed */ + if (changed) + br_ifinfo_notify(cmdmap, br, p); + + if (err) + return err; + } + + /* deal with options */ + if (cmd == RTM_NEWVLAN) { + struct net_bridge_vlan *range_start, *range_end; + + if (vinfo_last) { + range_start = br_vlan_find(vg, vinfo_last->vid); + range_end = br_vlan_find(vg, vinfo->vid); + } else { + range_start = br_vlan_find(vg, vinfo->vid); + range_end = range_start; + } + + err = br_vlan_process_options(br, p, range_start, range_end, + tb, extack); + } + + return err; +} + +static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct br_vlan_msg *bvm; + struct net_device *dev; + struct nlattr *attr; + int err, vlans = 0; + int rem; + + /* this should validate the header and check for remaining bytes */ + err = nlmsg_parse(nlh, sizeof(*bvm), NULL, BRIDGE_VLANDB_MAX, NULL, + extack); + if (err < 0) + return err; + + bvm = nlmsg_data(nlh); + dev = __dev_get_by_index(net, bvm->ifindex); + if (!dev) + return -ENODEV; + + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) { + NL_SET_ERR_MSG_MOD(extack, "The device is not a valid bridge or bridge port"); + return -EINVAL; + } + + nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) { + if (nla_type(attr) != BRIDGE_VLANDB_ENTRY) + continue; + + vlans++; + err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type, + extack); + if (err) + break; + } + if (!vlans) { + NL_SET_ERR_MSG_MOD(extack, "No vlans found to process"); + err = -EINVAL; + } + + return err; +} + +void br_vlan_rtnl_init(void) +{ + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETVLAN, NULL, + br_vlan_rtm_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWVLAN, + br_vlan_rtm_process, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELVLAN, + br_vlan_rtm_process, NULL, 0); +} + +void br_vlan_rtnl_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETVLAN); + rtnl_unregister(PF_BRIDGE, RTM_NEWVLAN); + rtnl_unregister(PF_BRIDGE, RTM_DELVLAN); +} diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c new file mode 100644 index 000000000..b4add9ea8 --- /dev/null +++ b/net/bridge/br_vlan_options.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@cumulusnetworks.com> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/ip_tunnels.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static bool __vlan_tun_put(struct sk_buff *skb, const struct net_bridge_vlan *v) +{ + __be32 tid = tunnel_id_to_key32(v->tinfo.tunnel_id); + struct nlattr *nest; + + if (!v->tinfo.tunnel_dst) + return true; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO); + if (!nest) + return false; + if (nla_put_u32(skb, BRIDGE_VLANDB_TINFO_ID, be32_to_cpu(tid))) { + nla_nest_cancel(skb, nest); + return false; + } + nla_nest_end(skb, nest); + + return true; +} + +static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return (!v_curr->tinfo.tunnel_dst && !range_end->tinfo.tunnel_dst) || + vlan_tunid_inrange(v_curr, range_end); +} + +/* check if the options' state of v_curr allow it to enter the range */ +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return v_curr->state == range_end->state && + __vlan_tun_can_enter_range(v_curr, range_end); +} + +bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) +{ + return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, + br_vlan_get_state(v)) && + __vlan_tun_put(skb, v); +} + +size_t br_vlan_opts_nl_size(void) +{ + return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */ + + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */ + + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */ +} + +static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, + u8 state, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct net_bridge *br; + + ASSERT_RTNL(); + + if (state > BR_STATE_BLOCKING) { + NL_SET_ERR_MSG_MOD(extack, "Invalid vlan state"); + return -EINVAL; + } + + if (br_vlan_is_brentry(v)) + br = v->br; + else + br = v->port->br; + + if (br->stp_enabled == BR_KERNEL_STP) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify vlan state when using kernel STP"); + return -EBUSY; + } + + if (v->state == state) + return 0; + + if (v->vid == br_get_pvid(vg)) + br_vlan_set_pvid_state(vg, state); + + br_vlan_set_state(v, state); + *changed = true; + + return 0; +} + +static const struct nla_policy br_vlandb_tinfo_pol[BRIDGE_VLANDB_TINFO_MAX + 1] = { + [BRIDGE_VLANDB_TINFO_ID] = { .type = NLA_U32 }, + [BRIDGE_VLANDB_TINFO_CMD] = { .type = NLA_U32 }, +}; + +static int br_vlan_modify_tunnel(const struct net_bridge_port *p, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct nlattr *tun_tb[BRIDGE_VLANDB_TINFO_MAX + 1], *attr; + struct bridge_vlan_info *vinfo; + u32 tun_id = 0; + int cmd, err; + + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify tunnel mapping of non-port vlans"); + return -EINVAL; + } + if (!(p->flags & BR_VLAN_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't have tunnel flag set"); + return -EINVAL; + } + + attr = tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]; + err = nla_parse_nested(tun_tb, BRIDGE_VLANDB_TINFO_MAX, attr, + br_vlandb_tinfo_pol, extack); + if (err) + return err; + + if (!tun_tb[BRIDGE_VLANDB_TINFO_CMD]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel command attribute"); + return -ENOENT; + } + cmd = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_CMD]); + switch (cmd) { + case RTM_SETLINK: + if (!tun_tb[BRIDGE_VLANDB_TINFO_ID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel id attribute"); + return -ENOENT; + } + /* when working on vlan ranges this is the starting tunnel id */ + tun_id = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_ID]); + /* vlan info attr is guaranteed by br_vlan_rtm_process_one */ + vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]); + /* tunnel ids are mapped to each vlan in increasing order, + * the starting vlan is in BRIDGE_VLANDB_ENTRY_INFO and v is the + * current vlan, so we compute: tun_id + v - vinfo->vid + */ + tun_id += v->vid - vinfo->vid; + break; + case RTM_DELLINK: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel command"); + return -EINVAL; + } + + return br_vlan_tunnel_info(p, cmd, v->vid, tun_id, changed); +} + +static int br_vlan_process_one_opts(const struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + int err; + + *changed = false; + if (tb[BRIDGE_VLANDB_ENTRY_STATE]) { + u8 state = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_STATE]); + + err = br_vlan_modify_state(vg, v, state, changed, extack); + if (err) + return err; + } + if (tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]) { + err = br_vlan_modify_tunnel(p, v, tb, changed, extack); + if (err) + return err; + } + + return 0; +} + +int br_vlan_process_options(const struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_vlan *range_start, + struct net_bridge_vlan *range_end, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL; + struct net_bridge_vlan_group *vg; + int vid, err = 0; + u16 pvid; + + if (p) + vg = nbp_vlan_group(p); + else + vg = br_vlan_group(br); + + if (!range_start || !br_vlan_should_use(range_start)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan range start doesn't exist, can't process options"); + return -ENOENT; + } + if (!range_end || !br_vlan_should_use(range_end)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan range end doesn't exist, can't process options"); + return -ENOENT; + } + + pvid = br_get_pvid(vg); + for (vid = range_start->vid; vid <= range_end->vid; vid++) { + bool changed = false; + + v = br_vlan_find(vg, vid); + if (!v || !br_vlan_should_use(v)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process options"); + err = -ENOENT; + break; + } + + err = br_vlan_process_one_opts(br, p, vg, v, tb, &changed, + extack); + if (err) + break; + + if (changed) { + /* vlan options changed, check for range */ + if (!curr_start) { + curr_start = v; + curr_end = v; + continue; + } + + if (v->vid == pvid || + !br_vlan_can_enter_range(v, curr_end)) { + br_vlan_notify(br, p, curr_start->vid, + curr_end->vid, RTM_NEWVLAN); + curr_start = v; + } + curr_end = v; + } else { + /* nothing changed and nothing to notify yet */ + if (!curr_start) + continue; + + br_vlan_notify(br, p, curr_start->vid, curr_end->vid, + RTM_NEWVLAN); + curr_start = NULL; + curr_end = NULL; + } + } + if (curr_start) + br_vlan_notify(br, p, curr_start->vid, curr_end->vid, + RTM_NEWVLAN); + + return err; +} diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c new file mode 100644 index 000000000..debe16720 --- /dev/null +++ b/net/bridge/br_vlan_tunnel.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Bridge per vlan tunnel port dst_metadata handling code + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/switchdev.h> +#include <net/dst_metadata.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct net_bridge_vlan *vle = ptr; + __be64 tunid = *(__be64 *)arg->key; + + return vle->tinfo.tunnel_id != tunid; +} + +static const struct rhashtable_params br_vlan_tunnel_rht_params = { + .head_offset = offsetof(struct net_bridge_vlan, tnode), + .key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id), + .key_len = sizeof(__be64), + .nelem_hint = 3, + .obj_cmpfn = br_vlan_tunid_cmp, + .automatic_shrinking = true, +}; + +static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, + u64 tunnel_id) +{ + return rhashtable_lookup_fast(tbl, &tunnel_id, + br_vlan_tunnel_rht_params); +} + +static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan) +{ + struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst); + + WRITE_ONCE(vlan->tinfo.tunnel_id, 0); + RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL); + dst_release(&tdst->dst); +} + +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ + if (!rcu_access_pointer(vlan->tinfo.tunnel_dst)) + return; + rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + vlan_tunnel_info_release(vlan); +} + +static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan, u32 tun_id) +{ + struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); + __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + int err; + + if (metadata) + return -EEXIST; + + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, + key, 0); + if (!metadata) + return -EINVAL; + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; + rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata); + WRITE_ONCE(vlan->tinfo.tunnel_id, key); + + err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + if (err) + goto out; + + return 0; +out: + vlan_tunnel_info_release(vlan); + + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + vlan = br_vlan_find(vg, vid); + if (!vlan) + return -EINVAL; + + return __vlan_tunnel_info_add(vg, vlan, tun_id); +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + vlan_tunnel_info_del(vg, v); + + return 0; +} + +static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vlan, *tmp; + + list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) + vlan_tunnel_info_del(vg, vlan); +} + +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + __vlan_tunnel_info_flush(vg); +} + +int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params); +} + +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg) +{ + rhashtable_destroy(&vg->tunnel_hash); +} + +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + struct ip_tunnel_info *tinfo = skb_tunnel_info(skb); + struct net_bridge_vlan *vlan; + + if (!vg || !tinfo) + return 0; + + /* if already tagged, ignore */ + if (skb_vlan_tagged(skb)) + return 0; + + /* lookup vid, given tunnel id */ + vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id); + if (!vlan) + return 0; + + skb_dst_drop(skb); + + __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid); + + return 0; +} + +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan) +{ + struct metadata_dst *tunnel_dst; + __be64 tunnel_id; + int err; + + if (!vlan) + return 0; + + tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id); + if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb))) + return 0; + + skb_dst_drop(skb); + err = skb_vlan_pop(skb); + if (err) + return err; + + tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); + if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) + skb_dst_set(skb, &tunnel_dst->dst); + + return 0; +} diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig new file mode 100644 index 000000000..5040fe43f --- /dev/null +++ b/net/bridge/netfilter/Kconfig @@ -0,0 +1,244 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Bridge netfilter configuration +# +# +menuconfig NF_TABLES_BRIDGE + depends on BRIDGE && NETFILTER && NF_TABLES + select NETFILTER_FAMILY_BRIDGE + tristate "Ethernet Bridge nf_tables support" + +if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + help + Add support for bridge dedicated meta key. + +config NFT_BRIDGE_REJECT + tristate "Netfilter nf_tables bridge reject support" + depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6 + help + Add support to reject packets. + +config NF_LOG_BRIDGE + tristate "Bridge packet logging" + select NF_LOG_COMMON + +endif # NF_TABLES_BRIDGE + +config NF_CONNTRACK_BRIDGE + tristate "IPv4/IPV6 bridge connection tracking support" + depends on NF_CONNTRACK + default n + help + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. This is used to enhance packet filtering via + stateful policies. Enable this if you want native tracking from + the bridge. This provides a replacement for the `br_netfilter' + infrastructure. + + To compile it as a module, choose M here. If unsure, say N. + +menuconfig BRIDGE_NF_EBTABLES + tristate "Ethernet Bridge tables (ebtables) support" + depends on BRIDGE && NETFILTER && NETFILTER_XTABLES + select NETFILTER_FAMILY_BRIDGE + help + ebtables is a general, extensible frame/packet identification + framework. Say 'Y' or 'M' here if you want to do Ethernet + filtering/NAT/brouting on the Ethernet bridge. + +if BRIDGE_NF_EBTABLES + +# +# tables +# +config BRIDGE_EBT_BROUTE + tristate "ebt: broute table support" + help + The ebtables broute table is used to define rules that decide between + bridging and routing frames, giving Linux the functionality of a + brouter. See the man page for ebtables(8) and examples on the ebtables + website. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_T_FILTER + tristate "ebt: filter table support" + help + The ebtables filter table is used to define frame filtering rules at + local input, forwarding and local output. See the man page for + ebtables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_T_NAT + tristate "ebt: nat table support" + help + The ebtables nat table is used to define rules that alter the MAC + source address (MAC SNAT) or the MAC destination address (MAC DNAT). + See the man page for ebtables(8). + + To compile it as a module, choose M here. If unsure, say N. +# +# matches +# +config BRIDGE_EBT_802_3 + tristate "ebt: 802.3 filter support" + help + This option adds matching support for 802.3 Ethernet frames. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_AMONG + tristate "ebt: among filter support" + help + This option adds the among match, which allows matching the MAC source + and/or destination address on a list of addresses. Optionally, + MAC/IP address pairs can be matched, f.e. for anti-spoofing rules. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_ARP + tristate "ebt: ARP filter support" + help + This option adds the ARP match, which allows ARP and RARP header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_IP + tristate "ebt: IP filter support" + help + This option adds the IP match, which allows basic IP header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_IP6 + tristate "ebt: IP6 filter support" + depends on BRIDGE_NF_EBTABLES && IPV6 + help + This option adds the IP6 match, which allows basic IPV6 header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_LIMIT + tristate "ebt: limit match support" + help + This option adds the limit match, which allows you to control + the rate at which a rule can be matched. This match is the + equivalent of the iptables limit match. + + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.rst>. If unsure, say `N'. + +config BRIDGE_EBT_MARK + tristate "ebt: mark filter support" + help + This option adds the mark match, which allows matching frames based on + the 'nfmark' value in the frame. This can be set by the mark target. + This value is the same as the one used in the iptables mark match and + target. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_PKTTYPE + tristate "ebt: packet type filter support" + help + This option adds the packet type match, which allows matching on the + type of packet based on its Ethernet "class" (as determined by + the generic networking code): broadcast, multicast, + for this host alone or for another host. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_STP + tristate "ebt: STP filter support" + help + This option adds the Spanning Tree Protocol match, which + allows STP header field filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_VLAN + tristate "ebt: 802.1Q VLAN filter support" + help + This option adds the 802.1Q vlan match, which allows the filtering of + 802.1Q vlan fields. + + To compile it as a module, choose M here. If unsure, say N. +# +# targets +# +config BRIDGE_EBT_ARPREPLY + tristate "ebt: arp reply target support" + depends on BRIDGE_NF_EBTABLES && INET + help + This option adds the arp reply target, which allows + automatically sending arp replies to arp requests. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_DNAT + tristate "ebt: dnat target support" + help + This option adds the MAC DNAT target, which allows altering the MAC + destination address of frames. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_MARK_T + tristate "ebt: mark target support" + help + This option adds the mark target, which allows marking frames by + setting the 'nfmark' value in the frame. + This value is the same as the one used in the iptables mark match and + target. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_REDIRECT + tristate "ebt: redirect target support" + help + This option adds the MAC redirect target, which allows altering the MAC + destination address of a frame to that of the device it arrived on. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_SNAT + tristate "ebt: snat target support" + help + This option adds the MAC SNAT target, which allows altering the MAC + source address of frames. + + To compile it as a module, choose M here. If unsure, say N. +# +# watchers +# +config BRIDGE_EBT_LOG + tristate "ebt: log support" + help + This option adds the log watcher, that you can use in any rule + in any ebtables table. It records info about the frame header + to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_NFLOG + tristate "ebt: nflog support" + help + This option enables the nflog watcher, which allows to LOG + messages through the netfilter logging API, which can use + either the old LOG target, the old ULOG target or nfnetlink_log + as backend. + + This option adds the nflog watcher, that you can use in any rule + in any ebtables table. + + To compile it as a module, choose M here. If unsure, say N. + +endif # BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile new file mode 100644 index 000000000..8e2c5759d --- /dev/null +++ b/net/bridge/netfilter/Makefile @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the netfilter modules for Link Layer filtering on a bridge. +# + +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o +obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o + +# connection tracking +obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o + +# packet logging +obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o + +obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o + +# tables +obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o +obj-$(CONFIG_BRIDGE_EBT_T_FILTER) += ebtable_filter.o +obj-$(CONFIG_BRIDGE_EBT_T_NAT) += ebtable_nat.o + +#matches +obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o +obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o +obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o +obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o +obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o +obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o +obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o +obj-$(CONFIG_BRIDGE_EBT_STP) += ebt_stp.o +obj-$(CONFIG_BRIDGE_EBT_VLAN) += ebt_vlan.o + +# targets +obj-$(CONFIG_BRIDGE_EBT_ARPREPLY) += ebt_arpreply.o +obj-$(CONFIG_BRIDGE_EBT_MARK_T) += ebt_mark.o +obj-$(CONFIG_BRIDGE_EBT_DNAT) += ebt_dnat.o +obj-$(CONFIG_BRIDGE_EBT_REDIRECT) += ebt_redirect.o +obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o + +# watchers +obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o +obj-$(CONFIG_BRIDGE_EBT_NFLOG) += ebt_nflog.o diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c new file mode 100644 index 000000000..68c2519bd --- /dev/null +++ b/net/bridge/netfilter/ebt_802_3.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * 802_3 + * + * Author: + * Chris Vitale csv@bluetail.com + * + * May 2003 + * + */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/skbuff.h> +#include <uapi/linux/netfilter_bridge/ebt_802_3.h> + +static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) +{ + return (struct ebt_802_3_hdr *)skb_mac_header(skb); +} + +static bool +ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_802_3_info *info = par->matchinfo; + const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); + __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; + + if (info->bitmask & EBT_802_3_SAP) { + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap)) + return false; + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap)) + return false; + } + + if (info->bitmask & EBT_802_3_TYPE) { + if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) + return false; + if (NF_INVF(info, EBT_802_3_TYPE, info->type != type)) + return false; + } + + return true; +} + +static int ebt_802_3_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_802_3_info *info = par->matchinfo; + + if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) + return -EINVAL; + + return 0; +} + +static struct xt_match ebt_802_3_mt_reg __read_mostly = { + .name = "802_3", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_802_3_mt, + .checkentry = ebt_802_3_mt_check, + .matchsize = sizeof(struct ebt_802_3_info), + .me = THIS_MODULE, +}; + +static int __init ebt_802_3_init(void) +{ + return xt_register_match(&ebt_802_3_mt_reg); +} + +static void __exit ebt_802_3_fini(void) +{ + xt_unregister_match(&ebt_802_3_mt_reg); +} + +module_init(ebt_802_3_init); +module_exit(ebt_802_3_fini); +MODULE_DESCRIPTION("Ebtables: DSAP/SSAP field and SNAP type matching"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c new file mode 100644 index 000000000..96f7243b6 --- /dev/null +++ b/net/bridge/netfilter/ebt_among.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_among + * + * Authors: + * Grzegorz Borowiak <grzes@gnu.univ.gda.pl> + * + * August, 2003 + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/ip.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> + +static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, __be32 ip) +{ + /* You may be puzzled as to how this code works. + * Some tricks were used, refer to + * include/linux/netfilter_bridge/ebt_among.h + * as there you can find a solution of this mystery. + */ + const struct ebt_mac_wormhash_tuple *p; + int start, limit, i; + uint32_t cmp[2] = { 0, 0 }; + int key = ((const unsigned char *)mac)[5]; + + ether_addr_copy(((char *) cmp) + 2, mac); + start = wh->table[key]; + limit = wh->table[key + 1]; + if (ip) { + for (i = start; i < limit; i++) { + p = &wh->pool[i]; + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0 || p->ip == ip) + return true; + } + } else { + for (i = start; i < limit; i++) { + p = &wh->pool[i]; + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0) + return true; + } + } + return false; +} + +static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash + *wh) +{ + int i; + + for (i = 0; i < 256; i++) { + if (wh->table[i] > wh->table[i + 1]) + return -0x100 - i; + if (wh->table[i] < 0) + return -0x200 - i; + if (wh->table[i] > wh->poolsize) + return -0x300 - i; + } + if (wh->table[256] > wh->poolsize) + return -0xc00; + return 0; +} + +static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) +{ + if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { + const struct iphdr *ih; + struct iphdr _iph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return -1; + *addr = ih->daddr; + } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(__be32) || + ah->ar_hln != ETH_ALEN) + return -1; + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + 2 * ETH_ALEN + sizeof(__be32), + sizeof(__be32), &buf); + if (bp == NULL) + return -1; + *addr = *bp; + } + return 0; +} + +static int get_ip_src(const struct sk_buff *skb, __be32 *addr) +{ + if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { + const struct iphdr *ih; + struct iphdr _iph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return -1; + *addr = ih->saddr; + } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(__be32) || + ah->ar_hln != ETH_ALEN) + return -1; + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + ETH_ALEN, sizeof(__be32), &buf); + if (bp == NULL) + return -1; + *addr = *bp; + } + return 0; +} + +static bool +ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_among_info *info = par->matchinfo; + const char *dmac, *smac; + const struct ebt_mac_wormhash *wh_dst, *wh_src; + __be32 dip = 0, sip = 0; + + wh_dst = ebt_among_wh_dst(info); + wh_src = ebt_among_wh_src(info); + + if (wh_src) { + smac = eth_hdr(skb)->h_source; + if (get_ip_src(skb, &sip)) + return false; + if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { + /* we match only if it contains */ + if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) + return false; + } else { + /* we match only if it DOES NOT contain */ + if (ebt_mac_wormhash_contains(wh_src, smac, sip)) + return false; + } + } + + if (wh_dst) { + dmac = eth_hdr(skb)->h_dest; + if (get_ip_dst(skb, &dip)) + return false; + if (!(info->bitmask & EBT_AMONG_DST_NEG)) { + /* we match only if it contains */ + if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) + return false; + } else { + /* we match only if it DOES NOT contain */ + if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) + return false; + } + } + + return true; +} + +static bool poolsize_invalid(const struct ebt_mac_wormhash *w) +{ + return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple)); +} + +static bool wormhash_offset_invalid(int off, unsigned int len) +{ + if (off == 0) /* not present */ + return false; + + if (off < (int)sizeof(struct ebt_among_info) || + off % __alignof__(struct ebt_mac_wormhash)) + return true; + + off += sizeof(struct ebt_mac_wormhash); + + return off > len; +} + +static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b) +{ + if (a == 0) + a = sizeof(struct ebt_among_info); + + return ebt_mac_wormhash_size(wh) + a == b; +} + +static int ebt_among_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_among_info *info = par->matchinfo; + const struct ebt_entry_match *em = + container_of(par->matchinfo, const struct ebt_entry_match, data); + unsigned int expected_length = sizeof(struct ebt_among_info); + const struct ebt_mac_wormhash *wh_dst, *wh_src; + int err; + + if (expected_length > em->match_size) + return -EINVAL; + + if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) || + wormhash_offset_invalid(info->wh_src_ofs, em->match_size)) + return -EINVAL; + + wh_dst = ebt_among_wh_dst(info); + if (poolsize_invalid(wh_dst)) + return -EINVAL; + + expected_length += ebt_mac_wormhash_size(wh_dst); + if (expected_length > em->match_size) + return -EINVAL; + + wh_src = ebt_among_wh_src(info); + if (poolsize_invalid(wh_src)) + return -EINVAL; + + if (info->wh_src_ofs < info->wh_dst_ofs) { + if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs)) + return -EINVAL; + } else { + if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs)) + return -EINVAL; + } + + expected_length += ebt_mac_wormhash_size(wh_src); + + if (em->match_size != EBT_ALIGN(expected_length)) { + pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); + return -EINVAL; + } + if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { + pr_err_ratelimited("dst integrity fail: %x\n", -err); + return -EINVAL; + } + if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { + pr_err_ratelimited("src integrity fail: %x\n", -err); + return -EINVAL; + } + return 0; +} + +static struct xt_match ebt_among_mt_reg __read_mostly = { + .name = "among", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_among_mt, + .checkentry = ebt_among_mt_check, + .matchsize = -1, /* special case */ + .me = THIS_MODULE, +}; + +static int __init ebt_among_init(void) +{ + return xt_register_match(&ebt_among_mt_reg); +} + +static void __exit ebt_among_fini(void) +{ + xt_unregister_match(&ebt_among_mt_reg); +} + +module_init(ebt_among_init); +module_exit(ebt_among_fini); +MODULE_DESCRIPTION("Ebtables: Combined MAC/IP address list matching"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c new file mode 100644 index 000000000..0707cc00f --- /dev/null +++ b/net/bridge/netfilter/ebt_arp.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_arp + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * Tim Gardner <timg@tpi.com> + * + * April, 2002 + * + */ +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> + +static bool +ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_arp_info *info = par->matchinfo; + const struct arphdr *ah; + struct arphdr _arph; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) + return false; + if ((info->bitmask & EBT_ARP_OPCODE) && + NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op)) + return false; + if ((info->bitmask & EBT_ARP_HTYPE) && + NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd)) + return false; + if ((info->bitmask & EBT_ARP_PTYPE) && + NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro)) + return false; + + if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { + const __be32 *sap, *dap; + __be32 saddr, daddr; + + if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) + return false; + sap = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln, sizeof(saddr), + &saddr); + if (sap == NULL) + return false; + dap = skb_header_pointer(skb, sizeof(struct arphdr) + + 2*ah->ar_hln+sizeof(saddr), + sizeof(daddr), &daddr); + if (dap == NULL) + return false; + if ((info->bitmask & EBT_ARP_SRC_IP) && + NF_INVF(info, EBT_ARP_SRC_IP, + info->saddr != (*sap & info->smsk))) + return false; + if ((info->bitmask & EBT_ARP_DST_IP) && + NF_INVF(info, EBT_ARP_DST_IP, + info->daddr != (*dap & info->dmsk))) + return false; + if ((info->bitmask & EBT_ARP_GRAT) && + NF_INVF(info, EBT_ARP_GRAT, *dap != *sap)) + return false; + } + + if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { + const unsigned char *mp; + unsigned char _mac[ETH_ALEN]; + + if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) + return false; + if (info->bitmask & EBT_ARP_SRC_MAC) { + mp = skb_header_pointer(skb, sizeof(struct arphdr), + sizeof(_mac), &_mac); + if (mp == NULL) + return false; + if (NF_INVF(info, EBT_ARP_SRC_MAC, + !ether_addr_equal_masked(mp, info->smaddr, + info->smmsk))) + return false; + } + + if (info->bitmask & EBT_ARP_DST_MAC) { + mp = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln + ah->ar_pln, + sizeof(_mac), &_mac); + if (mp == NULL) + return false; + if (NF_INVF(info, EBT_ARP_DST_MAC, + !ether_addr_equal_masked(mp, info->dmaddr, + info->dmmsk))) + return false; + } + } + + return true; +} + +static int ebt_arp_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_arp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; + + if ((e->ethproto != htons(ETH_P_ARP) && + e->ethproto != htons(ETH_P_RARP)) || + e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) + return -EINVAL; + return 0; +} + +static struct xt_match ebt_arp_mt_reg __read_mostly = { + .name = "arp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_arp_mt, + .checkentry = ebt_arp_mt_check, + .matchsize = sizeof(struct ebt_arp_info), + .me = THIS_MODULE, +}; + +static int __init ebt_arp_init(void) +{ + return xt_register_match(&ebt_arp_mt_reg); +} + +static void __exit ebt_arp_fini(void) +{ + xt_unregister_match(&ebt_arp_mt_reg); +} + +module_init(ebt_arp_init); +module_exit(ebt_arp_fini); +MODULE_DESCRIPTION("Ebtables: ARP protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c new file mode 100644 index 000000000..d9e77e250 --- /dev/null +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_arpreply + * + * Authors: + * Grzegorz Borowiak <grzes@gnu.univ.gda.pl> + * Bart De Schuymer <bdschuym@pandora.be> + * + * August, 2003 + * + */ +#include <linux/if_arp.h> +#include <net/arp.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> + +static unsigned int +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_arpreply_info *info = par->targinfo; + const __be32 *siptr, *diptr; + __be32 _sip, _dip; + const struct arphdr *ap; + struct arphdr _ah; + const unsigned char *shp; + unsigned char _sha[ETH_ALEN]; + + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + if (ap == NULL) + return EBT_DROP; + + if (ap->ar_op != htons(ARPOP_REQUEST) || + ap->ar_hln != ETH_ALEN || + ap->ar_pro != htons(ETH_P_IP) || + ap->ar_pln != 4) + return EBT_CONTINUE; + + shp = skb_header_pointer(skb, sizeof(_ah), ETH_ALEN, &_sha); + if (shp == NULL) + return EBT_DROP; + + siptr = skb_header_pointer(skb, sizeof(_ah) + ETH_ALEN, + sizeof(_sip), &_sip); + if (siptr == NULL) + return EBT_DROP; + + diptr = skb_header_pointer(skb, + sizeof(_ah) + 2 * ETH_ALEN + sizeof(_sip), + sizeof(_dip), &_dip); + if (diptr == NULL) + return EBT_DROP; + + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, + (struct net_device *)xt_in(par), + *diptr, shp, info->mac, shp); + + return info->target; +} + +static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) +{ + const struct ebt_arpreply_info *info = par->targinfo; + const struct ebt_entry *e = par->entryinfo; + + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + if (e->ethproto != htons(ETH_P_ARP) || + e->invflags & EBT_IPROTO) + return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + + return 0; +} + +static struct xt_target ebt_arpreply_tg_reg __read_mostly = { + .name = "arpreply", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), + .target = ebt_arpreply_tg, + .checkentry = ebt_arpreply_tg_check, + .targetsize = sizeof(struct ebt_arpreply_info), + .me = THIS_MODULE, +}; + +static int __init ebt_arpreply_init(void) +{ + return xt_register_target(&ebt_arpreply_tg_reg); +} + +static void __exit ebt_arpreply_fini(void) +{ + xt_unregister_target(&ebt_arpreply_tg_reg); +} + +module_init(ebt_arpreply_init); +module_exit(ebt_arpreply_fini); +MODULE_DESCRIPTION("Ebtables: ARP reply target"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c new file mode 100644 index 000000000..3fda71a85 --- /dev/null +++ b/net/bridge/netfilter/ebt_dnat.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_dnat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * June, 2002 + * + */ +#include <linux/module.h> +#include <net/sock.h> +#include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> + +static unsigned int +ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_nat_info *info = par->targinfo; + + if (skb_ensure_writable(skb, 0)) + return EBT_DROP; + + ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); + + if (is_multicast_ether_addr(info->mac)) { + if (is_broadcast_ether_addr(info->mac)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: + dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; + + if (ether_addr_equal(info->mac, dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + else + skb->pkt_type = PACKET_OTHERHOST; + } + + return info->target; +} + +static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) +{ + const struct ebt_nat_info *info = par->targinfo; + unsigned int hook_mask; + + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; +} + +static struct xt_target ebt_dnat_tg_reg __read_mostly = { + .name = "dnat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), + .target = ebt_dnat_tg, + .checkentry = ebt_dnat_tg_check, + .targetsize = sizeof(struct ebt_nat_info), + .me = THIS_MODULE, +}; + +static int __init ebt_dnat_init(void) +{ + return xt_register_target(&ebt_dnat_tg_reg); +} + +static void __exit ebt_dnat_fini(void) +{ + xt_unregister_target(&ebt_dnat_tg_reg); +} + +module_init(ebt_dnat_init); +module_exit(ebt_dnat_fini); +MODULE_DESCRIPTION("Ebtables: Destination MAC address translation"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c new file mode 100644 index 000000000..df372496c --- /dev/null +++ b/net/bridge/netfilter/ebt_ip.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_ip + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + * Changes: + * added ip-sport and ip-dport + * Innominate Security Technologies AG <mhopf@innominate.com> + * September, 2002 + */ +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/in.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> + +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; + struct { + u8 type; + } igmphdr; +}; + +static bool +ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_ip_info *info = par->matchinfo; + const struct iphdr *ih; + struct iphdr _iph; + const union pkthdr *pptr; + union pkthdr _pkthdr; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return false; + if ((info->bitmask & EBT_IP_TOS) && + NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos)) + return false; + if ((info->bitmask & EBT_IP_SOURCE) && + NF_INVF(info, EBT_IP_SOURCE, + (ih->saddr & info->smsk) != info->saddr)) + return false; + if ((info->bitmask & EBT_IP_DEST) && + NF_INVF(info, EBT_IP_DEST, + (ih->daddr & info->dmsk) != info->daddr)) + return false; + if (info->bitmask & EBT_IP_PROTO) { + if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) + return false; + if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT | + EBT_IP_ICMP | EBT_IP_IGMP))) + return true; + if (ntohs(ih->frag_off) & IP_OFFSET) + return false; + + /* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */ + pptr = skb_header_pointer(skb, ih->ihl*4, + sizeof(_pkthdr), &_pkthdr); + if (pptr == NULL) + return false; + if (info->bitmask & EBT_IP_DPORT) { + u32 dst = ntohs(pptr->tcpudphdr.dst); + if (NF_INVF(info, EBT_IP_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) + return false; + } + if (info->bitmask & EBT_IP_SPORT) { + u32 src = ntohs(pptr->tcpudphdr.src); + if (NF_INVF(info, EBT_IP_SPORT, + src < info->sport[0] || + src > info->sport[1])) + return false; + } + if ((info->bitmask & EBT_IP_ICMP) && + NF_INVF(info, EBT_IP_ICMP, + pptr->icmphdr.type < info->icmp_type[0] || + pptr->icmphdr.type > info->icmp_type[1] || + pptr->icmphdr.code < info->icmp_code[0] || + pptr->icmphdr.code > info->icmp_code[1])) + return false; + if ((info->bitmask & EBT_IP_IGMP) && + NF_INVF(info, EBT_IP_IGMP, + pptr->igmphdr.type < info->igmp_type[0] || + pptr->igmphdr.type > info->igmp_type[1])) + return false; + } + return true; +} + +static int ebt_ip_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_ip_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; + + if (e->ethproto != htons(ETH_P_IP) || + e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { + if (info->invflags & EBT_IP_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return -EINVAL; + } + if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP_ICMP) { + if ((info->invflags & EBT_IP_PROTO) || + info->protocol != IPPROTO_ICMP) + return -EINVAL; + if (info->icmp_type[0] > info->icmp_type[1] || + info->icmp_code[0] > info->icmp_code[1]) + return -EINVAL; + } + if (info->bitmask & EBT_IP_IGMP) { + if ((info->invflags & EBT_IP_PROTO) || + info->protocol != IPPROTO_IGMP) + return -EINVAL; + if (info->igmp_type[0] > info->igmp_type[1]) + return -EINVAL; + } + return 0; +} + +static struct xt_match ebt_ip_mt_reg __read_mostly = { + .name = "ip", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip_mt, + .checkentry = ebt_ip_mt_check, + .matchsize = sizeof(struct ebt_ip_info), + .me = THIS_MODULE, +}; + +static int __init ebt_ip_init(void) +{ + return xt_register_match(&ebt_ip_mt_reg); +} + +static void __exit ebt_ip_fini(void) +{ + xt_unregister_match(&ebt_ip_mt_reg); +} + +module_init(ebt_ip_init); +module_exit(ebt_ip_fini); +MODULE_DESCRIPTION("Ebtables: IPv4 protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c new file mode 100644 index 000000000..f3225bc31 --- /dev/null +++ b/net/bridge/netfilter/ebt_ip6.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_ip6 + * + * Authors: + * Manohar Castelino <manohar.r.castelino@intel.com> + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Jan Engelhardt <jengelh@medozas.de> + * + * Summary: + * This is just a modification of the IPv4 code written by + * Bart De Schuymer <bdschuym@pandora.be> + * with the changes required to support IPv6 + * + * Jan, 2008 + */ +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in.h> +#include <linux/module.h> +#include <net/dsfield.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> + +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; +}; + +static bool +ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_ip6_info *info = par->matchinfo; + const struct ipv6hdr *ih6; + struct ipv6hdr _ip6h; + const union pkthdr *pptr; + union pkthdr _pkthdr; + + ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); + if (ih6 == NULL) + return false; + if ((info->bitmask & EBT_IP6_TCLASS) && + NF_INVF(info, EBT_IP6_TCLASS, + info->tclass != ipv6_get_dsfield(ih6))) + return false; + if (((info->bitmask & EBT_IP6_SOURCE) && + NF_INVF(info, EBT_IP6_SOURCE, + ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr))) || + ((info->bitmask & EBT_IP6_DEST) && + NF_INVF(info, EBT_IP6_DEST, + ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr)))) + return false; + if (info->bitmask & EBT_IP6_PROTO) { + uint8_t nexthdr = ih6->nexthdr; + __be16 frag_off; + int offset_ph; + + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); + if (offset_ph == -1) + return false; + if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr)) + return false; + if (!(info->bitmask & (EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) + return true; + + /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ + pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), + &_pkthdr); + if (pptr == NULL) + return false; + if (info->bitmask & EBT_IP6_DPORT) { + u16 dst = ntohs(pptr->tcpudphdr.dst); + if (NF_INVF(info, EBT_IP6_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) + return false; + } + if (info->bitmask & EBT_IP6_SPORT) { + u16 src = ntohs(pptr->tcpudphdr.src); + if (NF_INVF(info, EBT_IP6_SPORT, + src < info->sport[0] || + src > info->sport[1])) + return false; + } + if ((info->bitmask & EBT_IP6_ICMP6) && + NF_INVF(info, EBT_IP6_ICMP6, + pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1])) + return false; + } + return true; +} + +static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_entry *e = par->entryinfo; + struct ebt_ip6_info *info = par->matchinfo; + + if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { + if (info->invflags & EBT_IP6_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return -EINVAL; + } + if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_ICMP6) { + if ((info->invflags & EBT_IP6_PROTO) || + info->protocol != IPPROTO_ICMPV6) + return -EINVAL; + if (info->icmpv6_type[0] > info->icmpv6_type[1] || + info->icmpv6_code[0] > info->icmpv6_code[1]) + return -EINVAL; + } + return 0; +} + +static struct xt_match ebt_ip6_mt_reg __read_mostly = { + .name = "ip6", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip6_mt, + .checkentry = ebt_ip6_mt_check, + .matchsize = sizeof(struct ebt_ip6_info), + .me = THIS_MODULE, +}; + +static int __init ebt_ip6_init(void) +{ + return xt_register_match(&ebt_ip6_mt_reg); +} + +static void __exit ebt_ip6_fini(void) +{ + xt_unregister_match(&ebt_ip6_mt_reg); +} + +module_init(ebt_ip6_init); +module_exit(ebt_ip6_fini); +MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c new file mode 100644 index 000000000..fa199556e --- /dev/null +++ b/net/bridge/netfilter/ebt_limit.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_limit + * + * Authors: + * Tom Marshall <tommy@home.tig-grr.com> + * + * Mostly copied from netfilter's ipt_limit.c, see that file for + * more explanation + * + * September, 2003 + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> + +static DEFINE_SPINLOCK(limit_lock); + +#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) + +#define _POW2_BELOW2(x) ((x)|((x)>>1)) +#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2)) +#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) +#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) +#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) + +#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) + +static bool +ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ebt_limit_info *info = (void *)par->matchinfo; + unsigned long now = jiffies; + + spin_lock_bh(&limit_lock); + info->credit += (now - xchg(&info->prev, now)) * CREDITS_PER_JIFFY; + if (info->credit > info->credit_cap) + info->credit = info->credit_cap; + + if (info->credit >= info->cost) { + /* We're not limited. */ + info->credit -= info->cost; + spin_unlock_bh(&limit_lock); + return true; + } + + spin_unlock_bh(&limit_lock); + return false; +} + +/* Precision saver. */ +static u_int32_t +user2credits(u_int32_t user) +{ + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + /* Divide first. */ + return (user / EBT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; +} + +static int ebt_limit_mt_check(const struct xt_mtchk_param *par) +{ + struct ebt_limit_info *info = par->matchinfo; + + /* Check for overflow. */ + if (info->burst == 0 || + user2credits(info->avg * info->burst) < user2credits(info->avg)) { + pr_info_ratelimited("overflow, try lower: %u/%u\n", + info->avg, info->burst); + return -EINVAL; + } + + /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ + info->prev = jiffies; + info->credit = user2credits(info->avg * info->burst); + info->credit_cap = user2credits(info->avg * info->burst); + info->cost = user2credits(info->avg); + return 0; +} + + +#ifdef CONFIG_COMPAT +/* + * no conversion function needed -- + * only avg/burst have meaningful values in userspace. + */ +struct ebt_compat_limit_info { + compat_uint_t avg, burst; + compat_ulong_t prev; + compat_uint_t credit, credit_cap, cost; +}; +#endif + +static struct xt_match ebt_limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_limit_mt, + .checkentry = ebt_limit_mt_check, + .matchsize = sizeof(struct ebt_limit_info), + .usersize = offsetof(struct ebt_limit_info, prev), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct ebt_compat_limit_info), +#endif + .me = THIS_MODULE, +}; + +static int __init ebt_limit_init(void) +{ + return xt_register_match(&ebt_limit_mt_reg); +} + +static void __exit ebt_limit_fini(void) +{ + xt_unregister_match(&ebt_limit_mt_reg); +} + +module_init(ebt_limit_init); +module_exit(ebt_limit_fini); +MODULE_DESCRIPTION("Ebtables: Rate-limit match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c new file mode 100644 index 000000000..e2eea1daa --- /dev/null +++ b/net/bridge/netfilter/ebt_log.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_log + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * Harald Welte <laforge@netfilter.org> + * + * April, 2002 + * + */ +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/if_arp.h> +#include <linux/spinlock.h> +#include <net/netfilter/nf_log.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in6.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/netfilter.h> + +static DEFINE_SPINLOCK(ebt_log_lock); + +static int ebt_log_tg_check(const struct xt_tgchk_param *par) +{ + struct ebt_log_info *info = par->targinfo; + + if (info->bitmask & ~EBT_LOG_MASK) + return -EINVAL; + if (info->loglevel >= 8) + return -EINVAL; + info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; + return 0; +} + +struct tcpudphdr { + __be16 src; + __be16 dst; +}; + +struct arppayload { + unsigned char mac_src[ETH_ALEN]; + unsigned char ip_src[4]; + unsigned char mac_dst[ETH_ALEN]; + unsigned char ip_dst[4]; +}; + +static void +print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) +{ + if (protocol == IPPROTO_TCP || + protocol == IPPROTO_UDP || + protocol == IPPROTO_UDPLITE || + protocol == IPPROTO_SCTP || + protocol == IPPROTO_DCCP) { + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + + pptr = skb_header_pointer(skb, offset, + sizeof(_ports), &_ports); + if (pptr == NULL) { + pr_cont(" INCOMPLETE TCP/UDP header"); + return; + } + pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + } +} + +static void +ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct nf_loginfo *loginfo, + const char *prefix) +{ + unsigned int bitmask; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + return; + + spin_lock_bh(&ebt_log_lock); + printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", + '0' + loginfo->u.log.level, prefix, + in ? in->name : "", out ? out->name : "", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + + if (loginfo->type == NF_LOG_TYPE_LOG) + bitmask = loginfo->u.log.logflags; + else + bitmask = NF_LOG_DEFAULT_MASK; + + if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == + htons(ETH_P_IP)) { + const struct iphdr *ih; + struct iphdr _iph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + pr_cont(" INCOMPLETE IP header"); + goto out; + } + pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", + &ih->saddr, &ih->daddr, ih->tos, ih->protocol); + print_ports(skb, ih->protocol, ih->ihl*4); + goto out; + } + +#if IS_ENABLED(CONFIG_BRIDGE_EBT_IP6) + if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == + htons(ETH_P_IPV6)) { + const struct ipv6hdr *ih; + struct ipv6hdr _iph; + uint8_t nexthdr; + __be16 frag_off; + int offset_ph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + pr_cont(" INCOMPLETE IPv6 header"); + goto out; + } + pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); + nexthdr = ih->nexthdr; + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off); + if (offset_ph == -1) + goto out; + print_ports(skb, nexthdr, offset_ph); + goto out; + } +#endif + + if ((bitmask & EBT_LOG_ARP) && + ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || + (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { + const struct arphdr *ah; + struct arphdr _arph; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) { + pr_cont(" INCOMPLETE ARP header"); + goto out; + } + pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), + ntohs(ah->ar_op)); + + /* If it's for Ethernet and the lengths are OK, + * then log the ARP payload + */ + if (ah->ar_hrd == htons(1) && + ah->ar_hln == ETH_ALEN && + ah->ar_pln == sizeof(__be32)) { + const struct arppayload *ap; + struct arppayload _arpp; + + ap = skb_header_pointer(skb, sizeof(_arph), + sizeof(_arpp), &_arpp); + if (ap == NULL) { + pr_cont(" INCOMPLETE ARP payload"); + goto out; + } + pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", + ap->mac_src, ap->ip_src, + ap->mac_dst, ap->ip_dst); + } + } +out: + pr_cont("\n"); + spin_unlock_bh(&ebt_log_lock); +} + +static unsigned int +ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_log_info *info = par->targinfo; + struct nf_loginfo li; + struct net *net = xt_net(par); + + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = info->loglevel; + li.u.log.logflags = info->bitmask; + + /* Remember that we have to use ebt_log_packet() not to break backward + * compatibility. We cannot use the default bridge packet logger via + * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo + */ + if (info->bitmask & EBT_LOG_NFLOG) + nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, "%s", + info->prefix); + else + ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); + return EBT_CONTINUE; +} + +static struct xt_target ebt_log_tg_reg __read_mostly = { + .name = "log", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_log_tg, + .checkentry = ebt_log_tg_check, + .targetsize = sizeof(struct ebt_log_info), + .me = THIS_MODULE, +}; + +static int __init ebt_log_init(void) +{ + return xt_register_target(&ebt_log_tg_reg); +} + +static void __exit ebt_log_fini(void) +{ + xt_unregister_target(&ebt_log_tg_reg); +} + +module_init(ebt_log_init); +module_exit(ebt_log_fini); +MODULE_DESCRIPTION("Ebtables: Packet logging to syslog"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c new file mode 100644 index 000000000..21fd3d3d7 --- /dev/null +++ b/net/bridge/netfilter/ebt_mark.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_mark + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * July, 2002 + * + */ + +/* The mark target can be used in any chain, + * I believe adding a mangle table just for marking is total overkill. + * Marking a frame doesn't really change anything in the frame anyway. + */ + +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_mark_t.h> + +static unsigned int +ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_mark_t_info *info = par->targinfo; + int action = info->target & -16; + + if (action == MARK_SET_VALUE) + skb->mark = info->mark; + else if (action == MARK_OR_VALUE) + skb->mark |= info->mark; + else if (action == MARK_AND_VALUE) + skb->mark &= info->mark; + else + skb->mark ^= info->mark; + + return info->target | ~EBT_VERDICT_BITS; +} + +static int ebt_mark_tg_check(const struct xt_tgchk_param *par) +{ + const struct ebt_mark_t_info *info = par->targinfo; + int tmp; + + tmp = info->target | ~EBT_VERDICT_BITS; + if (BASE_CHAIN && tmp == EBT_RETURN) + return -EINVAL; + if (ebt_invalid_target(tmp)) + return -EINVAL; + tmp = info->target & ~EBT_VERDICT_BITS; + if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && + tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) + return -EINVAL; + return 0; +} +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_t_info { + compat_ulong_t mark; + compat_uint_t target; +}; + +static void mark_tg_compat_from_user(void *dst, const void *src) +{ + const struct compat_ebt_mark_t_info *user = src; + struct ebt_mark_t_info *kern = dst; + + kern->mark = user->mark; + kern->target = user->target; +} + +static int mark_tg_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_t_info __user *user = dst; + const struct ebt_mark_t_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->target, &user->target)) + return -EFAULT; + return 0; +} +#endif + +static struct xt_target ebt_mark_tg_reg __read_mostly = { + .name = "mark", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_mark_tg, + .checkentry = ebt_mark_tg_check, + .targetsize = sizeof(struct ebt_mark_t_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_t_info), + .compat_from_user = mark_tg_compat_from_user, + .compat_to_user = mark_tg_compat_to_user, +#endif + .me = THIS_MODULE, +}; + +static int __init ebt_mark_init(void) +{ + return xt_register_target(&ebt_mark_tg_reg); +} + +static void __exit ebt_mark_fini(void) +{ + xt_unregister_target(&ebt_mark_tg_reg); +} + +module_init(ebt_mark_init); +module_exit(ebt_mark_fini); +MODULE_DESCRIPTION("Ebtables: Packet mark modification"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c new file mode 100644 index 000000000..81fb59dec --- /dev/null +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_mark_m + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * July, 2002 + * + */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_mark_m.h> + +static bool +ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_mark_m_info *info = par->matchinfo; + + if (info->bitmask & EBT_MARK_OR) + return !!(skb->mark & info->mask) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; +} + +static int ebt_mark_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_mark_m_info *info = par->matchinfo; + + if (info->bitmask & ~EBT_MARK_MASK) + return -EINVAL; + if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) + return -EINVAL; + if (!info->bitmask) + return -EINVAL; + return 0; +} + + +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_m_info { + compat_ulong_t mark, mask; + uint8_t invert, bitmask; +}; + +static void mark_mt_compat_from_user(void *dst, const void *src) +{ + const struct compat_ebt_mark_m_info *user = src; + struct ebt_mark_m_info *kern = dst; + + kern->mark = user->mark; + kern->mask = user->mask; + kern->invert = user->invert; + kern->bitmask = user->bitmask; +} + +static int mark_mt_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_m_info __user *user = dst; + const struct ebt_mark_m_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->mask, &user->mask) || + put_user(kern->invert, &user->invert) || + put_user(kern->bitmask, &user->bitmask)) + return -EFAULT; + return 0; +} +#endif + +static struct xt_match ebt_mark_mt_reg __read_mostly = { + .name = "mark_m", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_mark_mt, + .checkentry = ebt_mark_mt_check, + .matchsize = sizeof(struct ebt_mark_m_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_m_info), + .compat_from_user = mark_mt_compat_from_user, + .compat_to_user = mark_mt_compat_to_user, +#endif + .me = THIS_MODULE, +}; + +static int __init ebt_mark_m_init(void) +{ + return xt_register_match(&ebt_mark_mt_reg); +} + +static void __exit ebt_mark_m_fini(void) +{ + xt_unregister_match(&ebt_mark_mt_reg); +} + +module_init(ebt_mark_m_init); +module_exit(ebt_mark_m_fini); +MODULE_DESCRIPTION("Ebtables: Packet mark match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c new file mode 100644 index 000000000..61bf8f446 --- /dev/null +++ b/net/bridge/netfilter/ebt_nflog.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_nflog + * + * Author: + * Peter Warasin <peter@endian.com> + * + * February, 2008 + * + * Based on: + * xt_NFLOG.c, (C) 2006 by Patrick McHardy <kaber@trash.net> + * ebt_ulog.c, (C) 2004 by Bart De Schuymer <bdschuym@pandora.be> + * + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nflog.h> +#include <net/netfilter/nf_log.h> + +static unsigned int +ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_ULOG; + li.u.ulog.copy_len = info->len; + li.u.ulog.group = info->group; + li.u.ulog.qthreshold = info->threshold; + li.u.ulog.flags = 0; + + nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", info->prefix); + return EBT_CONTINUE; +} + +static int ebt_nflog_tg_check(const struct xt_tgchk_param *par) +{ + struct ebt_nflog_info *info = par->targinfo; + + if (info->flags & ~EBT_NFLOG_MASK) + return -EINVAL; + info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; + return 0; +} + +static struct xt_target ebt_nflog_tg_reg __read_mostly = { + .name = "nflog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_nflog_tg, + .checkentry = ebt_nflog_tg_check, + .targetsize = sizeof(struct ebt_nflog_info), + .me = THIS_MODULE, +}; + +static int __init ebt_nflog_init(void) +{ + return xt_register_target(&ebt_nflog_tg_reg); +} + +static void __exit ebt_nflog_fini(void) +{ + xt_unregister_target(&ebt_nflog_tg_reg); +} + +module_init(ebt_nflog_init); +module_exit(ebt_nflog_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Peter Warasin <peter@endian.com>"); +MODULE_DESCRIPTION("ebtables NFLOG netfilter logging module"); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c new file mode 100644 index 000000000..c9e306119 --- /dev/null +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_pkttype + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2003 + * + */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_pkttype.h> + +static bool +ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_pkttype_info *info = par->matchinfo; + + return (skb->pkt_type == info->pkt_type) ^ info->invert; +} + +static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_pkttype_info *info = par->matchinfo; + + if (info->invert != 0 && info->invert != 1) + return -EINVAL; + /* Allow any pkt_type value */ + return 0; +} + +static struct xt_match ebt_pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_pkttype_mt, + .checkentry = ebt_pkttype_mt_check, + .matchsize = sizeof(struct ebt_pkttype_info), + .me = THIS_MODULE, +}; + +static int __init ebt_pkttype_init(void) +{ + return xt_register_match(&ebt_pkttype_mt_reg); +} + +static void __exit ebt_pkttype_fini(void) +{ + xt_unregister_match(&ebt_pkttype_mt_reg); +} + +module_init(ebt_pkttype_init); +module_exit(ebt_pkttype_fini); +MODULE_DESCRIPTION("Ebtables: Link layer packet type match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c new file mode 100644 index 000000000..307790562 --- /dev/null +++ b/net/bridge/netfilter/ebt_redirect.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_redirect + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ +#include <linux/module.h> +#include <net/sock.h> +#include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> + +static unsigned int +ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_redirect_info *info = par->targinfo; + + if (skb_ensure_writable(skb, 0)) + return EBT_DROP; + + if (xt_hooknum(par) != NF_BR_BROUTING) + /* rcu_read_lock()ed by nf_hook_thresh */ + ether_addr_copy(eth_hdr(skb)->h_dest, + br_port_get_rcu(xt_in(par))->br->dev->dev_addr); + else + ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); + skb->pkt_type = PACKET_HOST; + return info->target; +} + +static int ebt_redirect_tg_check(const struct xt_tgchk_param *par) +{ + const struct ebt_redirect_info *info = par->targinfo; + unsigned int hook_mask; + + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; +} + +static struct xt_target ebt_redirect_tg_reg __read_mostly = { + .name = "redirect", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_BROUTING), + .target = ebt_redirect_tg, + .checkentry = ebt_redirect_tg_check, + .targetsize = sizeof(struct ebt_redirect_info), + .me = THIS_MODULE, +}; + +static int __init ebt_redirect_init(void) +{ + return xt_register_target(&ebt_redirect_tg_reg); +} + +static void __exit ebt_redirect_fini(void) +{ + xt_unregister_target(&ebt_redirect_tg_reg); +} + +module_init(ebt_redirect_init); +module_exit(ebt_redirect_fini); +MODULE_DESCRIPTION("Ebtables: Packet redirection to localhost"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c new file mode 100644 index 000000000..7dfbcdfc3 --- /dev/null +++ b/net/bridge/netfilter/ebt_snat.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_snat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * June, 2002 + * + */ +#include <linux/module.h> +#include <net/sock.h> +#include <linux/if_arp.h> +#include <net/arp.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> + +static unsigned int +ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_nat_info *info = par->targinfo; + + if (skb_ensure_writable(skb, 0)) + return EBT_DROP; + + ether_addr_copy(eth_hdr(skb)->h_source, info->mac); + if (!(info->target & NAT_ARP_BIT) && + eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + const struct arphdr *ap; + struct arphdr _ah; + + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + if (ap == NULL) + return EBT_DROP; + if (ap->ar_hln != ETH_ALEN) + goto out; + if (skb_store_bits(skb, sizeof(_ah), info->mac, ETH_ALEN)) + return EBT_DROP; + } +out: + return info->target | ~EBT_VERDICT_BITS; +} + +static int ebt_snat_tg_check(const struct xt_tgchk_param *par) +{ + const struct ebt_nat_info *info = par->targinfo; + int tmp; + + tmp = info->target | ~EBT_VERDICT_BITS; + if (BASE_CHAIN && tmp == EBT_RETURN) + return -EINVAL; + + if (ebt_invalid_target(tmp)) + return -EINVAL; + tmp = info->target | EBT_VERDICT_BITS; + if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) + return -EINVAL; + return 0; +} + +static struct xt_target ebt_snat_tg_reg __read_mostly = { + .name = "snat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), + .target = ebt_snat_tg, + .checkentry = ebt_snat_tg_check, + .targetsize = sizeof(struct ebt_nat_info), + .me = THIS_MODULE, +}; + +static int __init ebt_snat_init(void) +{ + return xt_register_target(&ebt_snat_tg_reg); +} + +static void __exit ebt_snat_fini(void) +{ + xt_unregister_target(&ebt_snat_tg_reg); +} + +module_init(ebt_snat_init); +module_exit(ebt_snat_fini); +MODULE_DESCRIPTION("Ebtables: Source MAC address translation"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c new file mode 100644 index 000000000..8f68afda5 --- /dev/null +++ b/net/bridge/netfilter/ebt_stp.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebt_stp + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * Stephen Hemminger <shemminger@osdl.org> + * + * July, 2003 + */ +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> + +#define BPDU_TYPE_CONFIG 0 + +struct stp_header { + u8 dsap; + u8 ssap; + u8 ctrl; + u8 pid; + u8 vers; + u8 type; +}; + +struct stp_config_pdu { + u8 flags; + u8 root[8]; + u8 root_cost[4]; + u8 sender[8]; + u8 port[2]; + u8 msg_age[2]; + u8 max_age[2]; + u8 hello_time[2]; + u8 forward_delay[2]; +}; + +#define NR16(p) (p[0] << 8 | p[1]) +#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) + +static bool ebt_filter_config(const struct ebt_stp_info *info, + const struct stp_config_pdu *stpc) +{ + const struct ebt_stp_config_info *c; + u16 v16; + u32 v32; + + c = &info->config; + if ((info->bitmask & EBT_STP_FLAGS) && + NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags)) + return false; + if (info->bitmask & EBT_STP_ROOTPRIO) { + v16 = NR16(stpc->root); + if (NF_INVF(info, EBT_STP_ROOTPRIO, + v16 < c->root_priol || v16 > c->root_priou)) + return false; + } + if (info->bitmask & EBT_STP_ROOTADDR) { + if (NF_INVF(info, EBT_STP_ROOTADDR, + !ether_addr_equal_masked(&stpc->root[2], + c->root_addr, + c->root_addrmsk))) + return false; + } + if (info->bitmask & EBT_STP_ROOTCOST) { + v32 = NR32(stpc->root_cost); + if (NF_INVF(info, EBT_STP_ROOTCOST, + v32 < c->root_costl || v32 > c->root_costu)) + return false; + } + if (info->bitmask & EBT_STP_SENDERPRIO) { + v16 = NR16(stpc->sender); + if (NF_INVF(info, EBT_STP_SENDERPRIO, + v16 < c->sender_priol || v16 > c->sender_priou)) + return false; + } + if (info->bitmask & EBT_STP_SENDERADDR) { + if (NF_INVF(info, EBT_STP_SENDERADDR, + !ether_addr_equal_masked(&stpc->sender[2], + c->sender_addr, + c->sender_addrmsk))) + return false; + } + if (info->bitmask & EBT_STP_PORT) { + v16 = NR16(stpc->port); + if (NF_INVF(info, EBT_STP_PORT, + v16 < c->portl || v16 > c->portu)) + return false; + } + if (info->bitmask & EBT_STP_MSGAGE) { + v16 = NR16(stpc->msg_age); + if (NF_INVF(info, EBT_STP_MSGAGE, + v16 < c->msg_agel || v16 > c->msg_ageu)) + return false; + } + if (info->bitmask & EBT_STP_MAXAGE) { + v16 = NR16(stpc->max_age); + if (NF_INVF(info, EBT_STP_MAXAGE, + v16 < c->max_agel || v16 > c->max_ageu)) + return false; + } + if (info->bitmask & EBT_STP_HELLOTIME) { + v16 = NR16(stpc->hello_time); + if (NF_INVF(info, EBT_STP_HELLOTIME, + v16 < c->hello_timel || v16 > c->hello_timeu)) + return false; + } + if (info->bitmask & EBT_STP_FWDD) { + v16 = NR16(stpc->forward_delay); + if (NF_INVF(info, EBT_STP_FWDD, + v16 < c->forward_delayl || v16 > c->forward_delayu)) + return false; + } + return true; +} + +static bool +ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_stp_info *info = par->matchinfo; + const struct stp_header *sp; + struct stp_header _stph; + const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + + sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); + if (sp == NULL) + return false; + + /* The stp code only considers these */ + if (memcmp(sp, header, sizeof(header))) + return false; + + if ((info->bitmask & EBT_STP_TYPE) && + NF_INVF(info, EBT_STP_TYPE, info->type != sp->type)) + return false; + + if (sp->type == BPDU_TYPE_CONFIG && + info->bitmask & EBT_STP_CONFIG_MASK) { + const struct stp_config_pdu *st; + struct stp_config_pdu _stpc; + + st = skb_header_pointer(skb, sizeof(_stph), + sizeof(_stpc), &_stpc); + if (st == NULL) + return false; + return ebt_filter_config(info, st); + } + return true; +} + +static int ebt_stp_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_stp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; + + if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || + !(info->bitmask & EBT_STP_MASK)) + return -EINVAL; + /* Make sure the match only receives stp frames */ + if (!par->nft_compat && + (!ether_addr_equal(e->destmac, eth_stp_addr) || + !(e->bitmask & EBT_DESTMAC) || + !is_broadcast_ether_addr(e->destmsk))) + return -EINVAL; + + return 0; +} + +static struct xt_match ebt_stp_mt_reg __read_mostly = { + .name = "stp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_stp_mt, + .checkentry = ebt_stp_mt_check, + .matchsize = sizeof(struct ebt_stp_info), + .me = THIS_MODULE, +}; + +static int __init ebt_stp_init(void) +{ + return xt_register_match(&ebt_stp_mt_reg); +} + +static void __exit ebt_stp_fini(void) +{ + xt_unregister_match(&ebt_stp_mt_reg); +} + +module_init(ebt_stp_init); +module_exit(ebt_stp_fini); +MODULE_DESCRIPTION("Ebtables: Spanning Tree Protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c new file mode 100644 index 000000000..80ede370a --- /dev/null +++ b/net/bridge/netfilter/ebt_vlan.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Description: EBTables 802.1Q match extension kernelspace module. + * Authors: Nick Fedchik <nick@fedchik.org.ua> + * Bart De Schuymer <bdschuym@pandora.be> + */ + +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_vlan.h> + +#define MODULE_VERS "0.6" + +MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); +MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match"); +MODULE_LICENSE("GPL"); + +#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } + +static bool +ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_vlan_info *info = par->matchinfo; + + unsigned short TCI; /* Whole TCI, given from parsed frame */ + unsigned short id; /* VLAN ID, given from frame TCI */ + unsigned char prio; /* user_priority, given from frame TCI */ + /* VLAN encapsulated Type/Length field, given from orig frame */ + __be16 encap; + + if (skb_vlan_tag_present(skb)) { + TCI = skb_vlan_tag_get(skb); + encap = skb->protocol; + } else { + const struct vlan_hdr *fp; + struct vlan_hdr _frame; + + fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); + if (fp == NULL) + return false; + + TCI = ntohs(fp->h_vlan_TCI); + encap = fp->h_vlan_encapsulated_proto; + } + + /* Tag Control Information (TCI) consists of the following elements: + * - User_priority. The user_priority field is three bits in length, + * interpreted as a binary number. + * - Canonical Format Indicator (CFI). The Canonical Format Indicator + * (CFI) is a single bit flag value. Currently ignored. + * - VLAN Identifier (VID). The VID is encoded as + * an unsigned binary number. + */ + id = TCI & VLAN_VID_MASK; + prio = (TCI >> 13) & 0x7; + + /* Checking VLAN Identifier (VID) */ + if (GET_BITMASK(EBT_VLAN_ID)) + EXIT_ON_MISMATCH(id, EBT_VLAN_ID); + + /* Checking user_priority */ + if (GET_BITMASK(EBT_VLAN_PRIO)) + EXIT_ON_MISMATCH(prio, EBT_VLAN_PRIO); + + /* Checking Encapsulated Proto (Length/Type) field */ + if (GET_BITMASK(EBT_VLAN_ENCAP)) + EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); + + return true; +} + +static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) +{ + struct ebt_vlan_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; + + /* Is it 802.1Q frame checked? */ + if (e->ethproto != htons(ETH_P_8021Q)) { + pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n", + ntohs(e->ethproto)); + return -EINVAL; + } + + /* Check for bitmask range + * True if even one bit is out of mask + */ + if (info->bitmask & ~EBT_VLAN_MASK) { + pr_debug("bitmask %2X is out of mask (%2X)\n", + info->bitmask, EBT_VLAN_MASK); + return -EINVAL; + } + + /* Check for inversion flags range */ + if (info->invflags & ~EBT_VLAN_MASK) { + pr_debug("inversion flags %2X is out of mask (%2X)\n", + info->invflags, EBT_VLAN_MASK); + return -EINVAL; + } + + /* Reserved VLAN ID (VID) values + * ----------------------------- + * 0 - The null VLAN ID. + * 1 - The default Port VID (PVID) + * 0x0FFF - Reserved for implementation use. + * if_vlan.h: VLAN_N_VID 4096. + */ + if (GET_BITMASK(EBT_VLAN_ID)) { + if (!!info->id) { /* if id!=0 => check vid range */ + if (info->id > VLAN_N_VID) { + pr_debug("id %d is out of range (1-4096)\n", + info->id); + return -EINVAL; + } + /* Note: This is valid VLAN-tagged frame point. + * Any value of user_priority are acceptable, + * but should be ignored according to 802.1Q Std. + * So we just drop the prio flag. + */ + info->bitmask &= ~EBT_VLAN_PRIO; + } + /* Else, id=0 (null VLAN ID) => user_priority range (any?) */ + } + + if (GET_BITMASK(EBT_VLAN_PRIO)) { + if ((unsigned char) info->prio > 7) { + pr_debug("prio %d is out of range (0-7)\n", + info->prio); + return -EINVAL; + } + } + /* Check for encapsulated proto range - it is possible to be + * any value for u_short range. + * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS + */ + if (GET_BITMASK(EBT_VLAN_ENCAP)) { + if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { + pr_debug("encap frame length %d is less than " + "minimal\n", ntohs(info->encap)); + return -EINVAL; + } + } + + return 0; +} + +static struct xt_match ebt_vlan_mt_reg __read_mostly = { + .name = "vlan", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_vlan_mt, + .checkentry = ebt_vlan_mt_check, + .matchsize = sizeof(struct ebt_vlan_info), + .me = THIS_MODULE, +}; + +static int __init ebt_vlan_init(void) +{ + pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n"); + return xt_register_match(&ebt_vlan_mt_reg); +} + +static void __exit ebt_vlan_fini(void) +{ + xt_unregister_match(&ebt_vlan_mt_reg); +} + +module_init(ebt_vlan_init); +module_exit(ebt_vlan_fini); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c new file mode 100644 index 000000000..57f91efce --- /dev/null +++ b/net/bridge/netfilter/ebtable_broute.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebtable_broute + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + * This table lets you choose between routing and bridging for frames + * entering on a bridge enslaved nic. This table is traversed before any + * other ebtables table. See net/bridge/br_input.c. + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/module.h> +#include <linux/if_bridge.h> + +#include "../br_private.h" + +/* EBT_ACCEPT means the frame will be bridged + * EBT_DROP means the frame will be routed + */ +static struct ebt_entries initial_chain = { + .name = "BROUTING", + .policy = EBT_ACCEPT, +}; + +static struct ebt_replace_kernel initial_table = { + .name = "broute", + .valid_hooks = 1 << NF_BR_BROUTING, + .entries_size = sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_BROUTING] = &initial_chain, + }, + .entries = (char *)&initial_chain, +}; + +static const struct ebt_table broute_table = { + .name = "broute", + .table = &initial_table, + .valid_hooks = 1 << NF_BR_BROUTING, + .me = THIS_MODULE, +}; + +static unsigned int ebt_broute(void *priv, struct sk_buff *skb, + const struct nf_hook_state *s) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + struct nf_hook_state state; + unsigned char *dest; + int ret; + + if (!p || p->state != BR_STATE_FORWARDING) + return NF_ACCEPT; + + nf_hook_state_init(&state, NF_BR_BROUTING, + NFPROTO_BRIDGE, s->in, NULL, NULL, + s->net, NULL); + + ret = ebt_do_table(skb, &state, state.net->xt.broute_table); + + if (ret != NF_DROP) + return ret; + + /* DROP in ebtables -t broute means that the + * skb should be routed, not bridged. + * This is awkward, but can't be changed for compatibility + * reasons. + * + * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. + */ + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; + + /* undo PACKET_HOST mangling done in br_input in case the dst + * address matches the logical bridge but not the port. + */ + dest = eth_hdr(skb)->h_dest; + if (skb->pkt_type == PACKET_HOST && + !ether_addr_equal(skb->dev->dev_addr, dest) && + ether_addr_equal(p->br->dev->dev_addr, dest)) + skb->pkt_type = PACKET_OTHERHOST; + + return NF_ACCEPT; +} + +static const struct nf_hook_ops ebt_ops_broute = { + .hook = ebt_broute, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_FIRST, +}; + +static int __net_init broute_net_init(struct net *net) +{ + return ebt_register_table(net, &broute_table, &ebt_ops_broute, + &net->xt.broute_table); +} + +static void __net_exit broute_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute); +} + +static void __net_exit broute_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.broute_table); +} + +static struct pernet_operations broute_net_ops = { + .init = broute_net_init, + .exit = broute_net_exit, + .pre_exit = broute_net_pre_exit, +}; + +static int __init ebtable_broute_init(void) +{ + return register_pernet_subsys(&broute_net_ops); +} + +static void __exit ebtable_broute_fini(void) +{ + unregister_pernet_subsys(&broute_net_ops); +} + +module_init(ebtable_broute_init); +module_exit(ebtable_broute_fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c new file mode 100644 index 000000000..7f2e620f4 --- /dev/null +++ b/net/bridge/netfilter/ebtable_filter.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebtable_filter + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <uapi/linux/netfilter_bridge.h> +#include <linux/module.h> + +#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ + (1 << NF_BR_LOCAL_OUT)) + +static struct ebt_entries initial_chains[] = { + { + .name = "INPUT", + .policy = EBT_ACCEPT, + }, + { + .name = "FORWARD", + .policy = EBT_ACCEPT, + }, + { + .name = "OUTPUT", + .policy = EBT_ACCEPT, + }, +}; + +static struct ebt_replace_kernel initial_table = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .entries_size = 3 * sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_LOCAL_IN] = &initial_chains[0], + [NF_BR_FORWARD] = &initial_chains[1], + [NF_BR_LOCAL_OUT] = &initial_chains[2], + }, + .entries = (char *)initial_chains, +}; + +static const struct ebt_table frame_filter = { + .name = "filter", + .table = &initial_table, + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, +}; + +static unsigned int +ebt_in_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, state->net->xt.frame_filter); +} + +static unsigned int +ebt_out_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, state->net->xt.frame_filter); +} + +static const struct nf_hook_ops ebt_ops_filter[] = { + { + .hook = ebt_in_hook, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_FILTER_BRIDGED, + }, + { + .hook = ebt_in_hook, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_FILTER_BRIDGED, + }, + { + .hook = ebt_out_hook, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_FILTER_OTHER, + }, +}; + +static int __net_init frame_filter_net_init(struct net *net) +{ + return ebt_register_table(net, &frame_filter, ebt_ops_filter, + &net->xt.frame_filter); +} + +static void __net_exit frame_filter_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter); +} + +static void __net_exit frame_filter_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.frame_filter); +} + +static struct pernet_operations frame_filter_net_ops = { + .init = frame_filter_net_init, + .exit = frame_filter_net_exit, + .pre_exit = frame_filter_net_pre_exit, +}; + +static int __init ebtable_filter_init(void) +{ + return register_pernet_subsys(&frame_filter_net_ops); +} + +static void __exit ebtable_filter_fini(void) +{ + unregister_pernet_subsys(&frame_filter_net_ops); +} + +module_init(ebtable_filter_init); +module_exit(ebtable_filter_fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c new file mode 100644 index 000000000..1743a1054 --- /dev/null +++ b/net/bridge/netfilter/ebtable_nat.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ebtable_nat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <uapi/linux/netfilter_bridge.h> +#include <linux/module.h> + +#define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ + (1 << NF_BR_POST_ROUTING)) + +static struct ebt_entries initial_chains[] = { + { + .name = "PREROUTING", + .policy = EBT_ACCEPT, + }, + { + .name = "OUTPUT", + .policy = EBT_ACCEPT, + }, + { + .name = "POSTROUTING", + .policy = EBT_ACCEPT, + } +}; + +static struct ebt_replace_kernel initial_table = { + .name = "nat", + .valid_hooks = NAT_VALID_HOOKS, + .entries_size = 3 * sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_PRE_ROUTING] = &initial_chains[0], + [NF_BR_LOCAL_OUT] = &initial_chains[1], + [NF_BR_POST_ROUTING] = &initial_chains[2], + }, + .entries = (char *)initial_chains, +}; + +static const struct ebt_table frame_nat = { + .name = "nat", + .table = &initial_table, + .valid_hooks = NAT_VALID_HOOKS, + .me = THIS_MODULE, +}; + +static unsigned int +ebt_nat_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, state->net->xt.frame_nat); +} + +static unsigned int +ebt_nat_out(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return ebt_do_table(skb, state, state->net->xt.frame_nat); +} + +static const struct nf_hook_ops ebt_ops_nat[] = { + { + .hook = ebt_nat_out, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_NAT_DST_OTHER, + }, + { + .hook = ebt_nat_out, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_NAT_SRC, + }, + { + .hook = ebt_nat_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_NAT_DST_BRIDGED, + }, +}; + +static int __net_init frame_nat_net_init(struct net *net) +{ + return ebt_register_table(net, &frame_nat, ebt_ops_nat, + &net->xt.frame_nat); +} + +static void __net_exit frame_nat_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat); +} + +static void __net_exit frame_nat_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.frame_nat); +} + +static struct pernet_operations frame_nat_net_ops = { + .init = frame_nat_net_init, + .exit = frame_nat_net_exit, + .pre_exit = frame_nat_net_pre_exit, +}; + +static int __init ebtable_nat_init(void) +{ + return register_pernet_subsys(&frame_nat_net_ops); +} + +static void __exit ebtable_nat_fini(void) +{ + unregister_pernet_subsys(&frame_nat_net_ops); +} + +module_init(ebtable_nat_init); +module_exit(ebtable_nat_fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c new file mode 100644 index 000000000..bab14186f --- /dev/null +++ b/net/bridge/netfilter/ebtables.c @@ -0,0 +1,2461 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ebtables + * + * Author: + * Bart De Schuymer <bdschuym@pandora.be> + * + * ebtables.c,v 2.0, July, 2002 + * + * This code is strongly inspired by the iptables code which is + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kmod.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/smp.h> +#include <linux/cpumask.h> +#include <linux/audit.h> +#include <net/sock.h> +/* needed for logical [in,out]-dev filtering */ +#include "../br_private.h" + +/* Each cpu has its own set of counters, so there is no need for write_lock in + * the softirq + * For reading or updating the counters, the user context needs to + * get a write_lock + */ + +/* The size of each set of counters is altered to get cache alignment */ +#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +#define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) +#define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ + COUNTER_OFFSET(n) * cpu)) + + + +static DEFINE_MUTEX(ebt_mutex); + +#ifdef CONFIG_COMPAT +static void ebt_standard_compat_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v >= 0) + v += xt_compat_calc_jump(NFPROTO_BRIDGE, v); + memcpy(dst, &v, sizeof(v)); +} + +static int ebt_standard_compat_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv >= 0) + cv -= xt_compat_calc_jump(NFPROTO_BRIDGE, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} +#endif + + +static struct xt_target ebt_standard_target = { + .name = "standard", + .revision = 0, + .family = NFPROTO_BRIDGE, + .targetsize = sizeof(int), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = ebt_standard_compat_from_user, + .compat_to_user = ebt_standard_compat_to_user, +#endif +}; + +static inline int +ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, + struct xt_action_param *par) +{ + par->target = w->u.watcher; + par->targinfo = w->data; + w->u.watcher->target(skb, par); + /* watchers don't give a verdict */ + return 0; +} + +static inline int +ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb, + struct xt_action_param *par) +{ + par->match = m->u.match; + par->matchinfo = m->data; + return !m->u.match->match(skb, par); +} + +static inline int +ebt_dev_check(const char *entry, const struct net_device *device) +{ + int i = 0; + const char *devname; + + if (*entry == '\0') + return 0; + if (!device) + return 1; + devname = device->name; + /* 1 is the wildcard token */ + while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) + i++; + return devname[i] != entry[i] && entry[i] != 1; +} + +/* process standard matches */ +static inline int +ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out) +{ + const struct ethhdr *h = eth_hdr(skb); + const struct net_bridge_port *p; + __be16 ethproto; + + if (skb_vlan_tag_present(skb)) + ethproto = htons(ETH_P_8021Q); + else + ethproto = h->h_proto; + + if (e->bitmask & EBT_802_3) { + if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto))) + return 1; + } else if (!(e->bitmask & EBT_NOPROTO) && + NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto)) + return 1; + + if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in))) + return 1; + if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) + return 1; + /* rcu_read_lock()ed by nf_hook_thresh */ + if (in && (p = br_port_get_rcu(in)) != NULL && + NF_INVF(e, EBT_ILOGICALIN, + ebt_dev_check(e->logical_in, p->br->dev))) + return 1; + if (out && (p = br_port_get_rcu(out)) != NULL && + NF_INVF(e, EBT_ILOGICALOUT, + ebt_dev_check(e->logical_out, p->br->dev))) + return 1; + + if (e->bitmask & EBT_SOURCEMAC) { + if (NF_INVF(e, EBT_ISOURCE, + !ether_addr_equal_masked(h->h_source, e->sourcemac, + e->sourcemsk))) + return 1; + } + if (e->bitmask & EBT_DESTMAC) { + if (NF_INVF(e, EBT_IDEST, + !ether_addr_equal_masked(h->h_dest, e->destmac, + e->destmsk))) + return 1; + } + return 0; +} + +static inline +struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +static inline const struct ebt_entry_target * +ebt_get_target_c(const struct ebt_entry *e) +{ + return ebt_get_target((struct ebt_entry *)e); +} + +/* Do some firewalling */ +unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table) +{ + unsigned int hook = state->hook; + int i, nentries; + struct ebt_entry *point; + struct ebt_counter *counter_base, *cb_base; + const struct ebt_entry_target *t; + int verdict, sp = 0; + struct ebt_chainstack *cs; + struct ebt_entries *chaininfo; + const char *base; + const struct ebt_table_info *private; + struct xt_action_param acpar; + + acpar.state = state; + acpar.hotdrop = false; + + read_lock_bh(&table->lock); + private = table->private; + cb_base = COUNTER_BASE(private->counters, private->nentries, + smp_processor_id()); + if (private->chainstack) + cs = private->chainstack[smp_processor_id()]; + else + cs = NULL; + chaininfo = private->hook_entry[hook]; + nentries = private->hook_entry[hook]->nentries; + point = (struct ebt_entry *)(private->hook_entry[hook]->data); + counter_base = cb_base + private->hook_entry[hook]->counter_offset; + /* base for chain jumps */ + base = private->entries; + i = 0; + while (i < nentries) { + if (ebt_basic_match(point, skb, state->in, state->out)) + goto letscontinue; + + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) + goto letscontinue; + if (acpar.hotdrop) { + read_unlock_bh(&table->lock); + return NF_DROP; + } + + ADD_COUNTER(*(counter_base + i), skb->len, 1); + + /* these should only watch: not modify, nor tell us + * what to do with the packet + */ + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); + + t = ebt_get_target_c(point); + /* standard target */ + if (!t->u.target->target) + verdict = ((struct ebt_standard_target *)t)->verdict; + else { + acpar.target = t->u.target; + acpar.targinfo = t->data; + verdict = t->u.target->target(skb, &acpar); + } + if (verdict == EBT_ACCEPT) { + read_unlock_bh(&table->lock); + return NF_ACCEPT; + } + if (verdict == EBT_DROP) { + read_unlock_bh(&table->lock); + return NF_DROP; + } + if (verdict == EBT_RETURN) { +letsreturn: + if (WARN(sp == 0, "RETURN on base chain")) { + /* act like this is EBT_CONTINUE */ + goto letscontinue; + } + + sp--; + /* put all the local variables right */ + i = cs[sp].n; + chaininfo = cs[sp].chaininfo; + nentries = chaininfo->nentries; + point = cs[sp].e; + counter_base = cb_base + + chaininfo->counter_offset; + continue; + } + if (verdict == EBT_CONTINUE) + goto letscontinue; + + if (WARN(verdict < 0, "bogus standard verdict\n")) { + read_unlock_bh(&table->lock); + return NF_DROP; + } + + /* jump to a udc */ + cs[sp].n = i + 1; + cs[sp].chaininfo = chaininfo; + cs[sp].e = ebt_next_entry(point); + i = 0; + chaininfo = (struct ebt_entries *) (base + verdict); + + if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) { + read_unlock_bh(&table->lock); + return NF_DROP; + } + + nentries = chaininfo->nentries; + point = (struct ebt_entry *)chaininfo->data; + counter_base = cb_base + chaininfo->counter_offset; + sp++; + continue; +letscontinue: + point = ebt_next_entry(point); + i++; + } + + /* I actually like this :) */ + if (chaininfo->policy == EBT_RETURN) + goto letsreturn; + if (chaininfo->policy == EBT_ACCEPT) { + read_unlock_bh(&table->lock); + return NF_ACCEPT; + } + read_unlock_bh(&table->lock); + return NF_DROP; +} + +/* If it succeeds, returns element and locks mutex */ +static inline void * +find_inlist_lock_noload(struct list_head *head, const char *name, int *error, + struct mutex *mutex) +{ + struct { + struct list_head list; + char name[EBT_FUNCTION_MAXNAMELEN]; + } *e; + + mutex_lock(mutex); + list_for_each_entry(e, head, list) { + if (strcmp(e->name, name) == 0) + return e; + } + *error = -ENOENT; + mutex_unlock(mutex); + return NULL; +} + +static void * +find_inlist_lock(struct list_head *head, const char *name, const char *prefix, + int *error, struct mutex *mutex) +{ + return try_then_request_module( + find_inlist_lock_noload(head, name, error, mutex), + "%s%s", prefix, name); +} + +static inline struct ebt_table * +find_table_lock(struct net *net, const char *name, int *error, + struct mutex *mutex) +{ + return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name, + "ebtable_", error, mutex); +} + +static inline void ebt_free_table_info(struct ebt_table_info *info) +{ + int i; + + if (info->chainstack) { + for_each_possible_cpu(i) + vfree(info->chainstack[i]); + vfree(info->chainstack); + } +} +static inline int +ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *cnt) +{ + const struct ebt_entry *e = par->entryinfo; + struct xt_match *match; + size_t left = ((char *)e + e->watchers_offset) - (char *)m; + int ret; + + if (left < sizeof(struct ebt_entry_match) || + left - sizeof(struct ebt_entry_match) < m->match_size) + return -EINVAL; + + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); + if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { + if (!IS_ERR(match)) + module_put(match->me); + request_module("ebt_%s", m->u.name); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); + } + if (IS_ERR(match)) + return PTR_ERR(match); + m->u.match = match; + + par->match = match; + par->matchinfo = m->data; + ret = xt_check_match(par, m->match_size, + ntohs(e->ethproto), e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(match->me); + return ret; + } + + (*cnt)++; + return 0; +} + +static inline int +ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, + unsigned int *cnt) +{ + const struct ebt_entry *e = par->entryinfo; + struct xt_target *watcher; + size_t left = ((char *)e + e->target_offset) - (char *)w; + int ret; + + if (left < sizeof(struct ebt_entry_watcher) || + left - sizeof(struct ebt_entry_watcher) < w->watcher_size) + return -EINVAL; + + watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); + if (IS_ERR(watcher)) + return PTR_ERR(watcher); + + if (watcher->family != NFPROTO_BRIDGE) { + module_put(watcher->me); + return -ENOENT; + } + + w->u.watcher = watcher; + + par->target = watcher; + par->targinfo = w->data; + ret = xt_check_target(par, w->watcher_size, + ntohs(e->ethproto), e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(watcher->me); + return ret; + } + + (*cnt)++; + return 0; +} + +static int ebt_verify_pointers(const struct ebt_replace *repl, + struct ebt_table_info *newinfo) +{ + unsigned int limit = repl->entries_size; + unsigned int valid_hooks = repl->valid_hooks; + unsigned int offset = 0; + int i; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) + newinfo->hook_entry[i] = NULL; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; + + while (offset < limit) { + size_t left = limit - offset; + struct ebt_entry *e = (void *)newinfo->entries + offset; + + if (left < sizeof(unsigned int)) + break; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if ((char __user *)repl->hook_entry[i] == + repl->entries + offset) + break; + } + + if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { + if (e->bitmask != 0) { + /* we make userspace set this right, + * so there is no misunderstanding + */ + return -EINVAL; + } + if (i != NF_BR_NUMHOOKS) + newinfo->hook_entry[i] = (struct ebt_entries *)e; + if (left < sizeof(struct ebt_entries)) + break; + offset += sizeof(struct ebt_entries); + } else { + if (left < sizeof(struct ebt_entry)) + break; + if (left < e->next_offset) + break; + if (e->next_offset < sizeof(struct ebt_entry)) + return -EINVAL; + offset += e->next_offset; + } + } + if (offset != limit) + return -EINVAL; + + /* check if all valid hooks have a chain */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (!newinfo->hook_entry[i] && + (valid_hooks & (1 << i))) + return -EINVAL; + } + return 0; +} + +/* this one is very careful, as it is the first function + * to parse the userspace data + */ +static inline int +ebt_check_entry_size_and_hooks(const struct ebt_entry *e, + const struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) +{ + int i; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((void *)e == (void *)newinfo->hook_entry[i]) + break; + } + /* beginning of a new chain + * if i == NF_BR_NUMHOOKS it must be a user defined chain + */ + if (i != NF_BR_NUMHOOKS || !e->bitmask) { + /* this checks if the previous chain has as many entries + * as it said it has + */ + if (*n != *cnt) + return -EINVAL; + + if (((struct ebt_entries *)e)->policy != EBT_DROP && + ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { + /* only RETURN from udc */ + if (i != NF_BR_NUMHOOKS || + ((struct ebt_entries *)e)->policy != EBT_RETURN) + return -EINVAL; + } + if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ + (*udc_cnt)++; + if (((struct ebt_entries *)e)->counter_offset != *totalcnt) + return -EINVAL; + *n = ((struct ebt_entries *)e)->nentries; + *cnt = 0; + return 0; + } + /* a plain old entry, heh */ + if (sizeof(struct ebt_entry) > e->watchers_offset || + e->watchers_offset > e->target_offset || + e->target_offset >= e->next_offset) + return -EINVAL; + + /* this is not checked anywhere else */ + if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) + return -EINVAL; + + (*cnt)++; + (*totalcnt)++; + return 0; +} + +struct ebt_cl_stack { + struct ebt_chainstack cs; + int from; + unsigned int hookmask; +}; + +/* We need these positions to check that the jumps to a different part of the + * entries is a jump to the beginning of a new chain. + */ +static inline int +ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, + unsigned int *n, struct ebt_cl_stack *udc) +{ + int i; + + /* we're only interested in chain starts */ + if (e->bitmask) + return 0; + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (newinfo->hook_entry[i] == (struct ebt_entries *)e) + break; + } + /* only care about udc */ + if (i != NF_BR_NUMHOOKS) + return 0; + + udc[*n].cs.chaininfo = (struct ebt_entries *)e; + /* these initialisations are depended on later in check_chainloops() */ + udc[*n].cs.n = 0; + udc[*n].hookmask = 0; + + (*n)++; + return 0; +} + +static inline int +ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) +{ + struct xt_mtdtor_param par; + + if (i && (*i)-- == 0) + return 1; + + par.net = net; + par.match = m->u.match; + par.matchinfo = m->data; + par.family = NFPROTO_BRIDGE; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); + return 0; +} + +static inline int +ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) +{ + struct xt_tgdtor_param par; + + if (i && (*i)-- == 0) + return 1; + + par.net = net; + par.target = w->u.watcher; + par.targinfo = w->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); + return 0; +} + +static inline int +ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) +{ + struct xt_tgdtor_param par; + struct ebt_entry_target *t; + + if (e->bitmask == 0) + return 0; + /* we're done */ + if (cnt && (*cnt)-- == 0) + return 1; + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); + t = ebt_get_target(e); + + par.net = net; + par.target = t->u.target; + par.targinfo = t->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); + return 0; +} + +static inline int +ebt_check_entry(struct ebt_entry *e, struct net *net, + const struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, + struct ebt_cl_stack *cl_s, unsigned int udc_cnt) +{ + struct ebt_entry_target *t; + struct xt_target *target; + unsigned int i, j, hook = 0, hookmask = 0; + size_t gap; + int ret; + struct xt_mtchk_param mtpar; + struct xt_tgchk_param tgpar; + + /* don't mess with the struct ebt_entries */ + if (e->bitmask == 0) + return 0; + + if (e->bitmask & ~EBT_F_MASK) + return -EINVAL; + + if (e->invflags & ~EBT_INV_MASK) + return -EINVAL; + + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) + return -EINVAL; + + /* what hook do we belong to? */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (!newinfo->hook_entry[i]) + continue; + if ((char *)newinfo->hook_entry[i] < (char *)e) + hook = i; + else + break; + } + /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on + * a base chain + */ + if (i < NF_BR_NUMHOOKS) + hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); + else { + for (i = 0; i < udc_cnt; i++) + if ((char *)(cl_s[i].cs.chaininfo) > (char *)e) + break; + if (i == 0) + hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); + else + hookmask = cl_s[i - 1].hookmask; + } + i = 0; + + memset(&mtpar, 0, sizeof(mtpar)); + memset(&tgpar, 0, sizeof(tgpar)); + mtpar.net = tgpar.net = net; + mtpar.table = tgpar.table = name; + mtpar.entryinfo = tgpar.entryinfo = e; + mtpar.hook_mask = tgpar.hook_mask = hookmask; + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); + if (ret != 0) + goto cleanup_matches; + j = 0; + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); + if (ret != 0) + goto cleanup_watchers; + t = ebt_get_target(e); + gap = e->next_offset - e->target_offset; + + target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0); + if (IS_ERR(target)) { + ret = PTR_ERR(target); + goto cleanup_watchers; + } + + /* Reject UNSPEC, xtables verdicts/return values are incompatible */ + if (target->family != NFPROTO_BRIDGE) { + module_put(target->me); + ret = -ENOENT; + goto cleanup_watchers; + } + + t->u.target = target; + if (t->u.target == &ebt_standard_target) { + if (gap < sizeof(struct ebt_standard_target)) { + ret = -EFAULT; + goto cleanup_watchers; + } + if (((struct ebt_standard_target *)t)->verdict < + -NUM_STANDARD_TARGETS) { + ret = -EFAULT; + goto cleanup_watchers; + } + } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { + module_put(t->u.target->me); + ret = -EFAULT; + goto cleanup_watchers; + } + + tgpar.target = target; + tgpar.targinfo = t->data; + ret = xt_check_target(&tgpar, t->target_size, + ntohs(e->ethproto), e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(target->me); + goto cleanup_watchers; + } + (*cnt)++; + return 0; +cleanup_watchers: + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); +cleanup_matches: + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); + return ret; +} + +/* checks for loops and sets the hook mask for udc + * the hook mask for udc tells us from which base chains the udc can be + * accessed. This mask is a parameter to the check() functions of the extensions + */ +static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, + unsigned int udc_cnt, unsigned int hooknr, char *base) +{ + int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; + const struct ebt_entry *e = (struct ebt_entry *)chain->data; + const struct ebt_entry_target *t; + + while (pos < nentries || chain_nr != -1) { + /* end of udc, go back one 'recursion' step */ + if (pos == nentries) { + /* put back values of the time when this chain was called */ + e = cl_s[chain_nr].cs.e; + if (cl_s[chain_nr].from != -1) + nentries = + cl_s[cl_s[chain_nr].from].cs.chaininfo->nentries; + else + nentries = chain->nentries; + pos = cl_s[chain_nr].cs.n; + /* make sure we won't see a loop that isn't one */ + cl_s[chain_nr].cs.n = 0; + chain_nr = cl_s[chain_nr].from; + if (pos == nentries) + continue; + } + t = ebt_get_target_c(e); + if (strcmp(t->u.name, EBT_STANDARD_TARGET)) + goto letscontinue; + if (e->target_offset + sizeof(struct ebt_standard_target) > + e->next_offset) + return -1; + + verdict = ((struct ebt_standard_target *)t)->verdict; + if (verdict >= 0) { /* jump to another chain */ + struct ebt_entries *hlp2 = + (struct ebt_entries *)(base + verdict); + for (i = 0; i < udc_cnt; i++) + if (hlp2 == cl_s[i].cs.chaininfo) + break; + /* bad destination or loop */ + if (i == udc_cnt) + return -1; + + if (cl_s[i].cs.n) + return -1; + + if (cl_s[i].hookmask & (1 << hooknr)) + goto letscontinue; + /* this can't be 0, so the loop test is correct */ + cl_s[i].cs.n = pos + 1; + pos = 0; + cl_s[i].cs.e = ebt_next_entry(e); + e = (struct ebt_entry *)(hlp2->data); + nentries = hlp2->nentries; + cl_s[i].from = chain_nr; + chain_nr = i; + /* this udc is accessible from the base chain for hooknr */ + cl_s[i].hookmask |= (1 << hooknr); + continue; + } +letscontinue: + e = ebt_next_entry(e); + pos++; + } + return 0; +} + +/* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ +static int translate_table(struct net *net, const char *name, + struct ebt_table_info *newinfo) +{ + unsigned int i, j, k, udc_cnt; + int ret; + struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */ + + i = 0; + while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) + i++; + if (i == NF_BR_NUMHOOKS) + return -EINVAL; + + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) + return -EINVAL; + + /* make sure chains are ordered after each other in same order + * as their corresponding hooks + */ + for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { + if (!newinfo->hook_entry[j]) + continue; + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) + return -EINVAL; + + i = j; + } + + /* do some early checkings and initialize some things */ + i = 0; /* holds the expected nr. of entries for the chain */ + j = 0; /* holds the up to now counted entries for the chain */ + k = 0; /* holds the total nr. of entries, should equal + * newinfo->nentries afterwards + */ + udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ + ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_check_entry_size_and_hooks, newinfo, + &i, &j, &k, &udc_cnt); + + if (ret != 0) + return ret; + + if (i != j) + return -EINVAL; + + if (k != newinfo->nentries) + return -EINVAL; + + /* get the location of the udc, put them in an array + * while we're at it, allocate the chainstack + */ + if (udc_cnt) { + /* this will get free'd in do_replace()/ebt_register_table() + * if an error occurs + */ + newinfo->chainstack = + vmalloc(array_size(nr_cpu_ids, + sizeof(*(newinfo->chainstack)))); + if (!newinfo->chainstack) + return -ENOMEM; + for_each_possible_cpu(i) { + newinfo->chainstack[i] = + vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0])))); + if (!newinfo->chainstack[i]) { + while (i) + vfree(newinfo->chainstack[--i]); + vfree(newinfo->chainstack); + newinfo->chainstack = NULL; + return -ENOMEM; + } + } + + cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s))); + if (!cl_s) + return -ENOMEM; + i = 0; /* the i'th udc */ + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_get_udc_positions, newinfo, &i, cl_s); + /* sanity check */ + if (i != udc_cnt) { + vfree(cl_s); + return -EFAULT; + } + } + + /* Check for loops */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) + if (newinfo->hook_entry[i]) + if (check_chainloops(newinfo->hook_entry[i], + cl_s, udc_cnt, i, newinfo->entries)) { + vfree(cl_s); + return -EINVAL; + } + + /* we now know the following (along with E=mc²): + * - the nr of entries in each chain is right + * - the size of the allocated space is right + * - all valid hooks have a corresponding chain + * - there are no loops + * - wrong data can still be on the level of a single entry + * - could be there are jumps to places that are not the + * beginning of a chain. This can only occur in chains that + * are not accessible from any base chains, so we don't care. + */ + + /* used to know what we need to clean up if something goes wrong */ + i = 0; + ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); + if (ret != 0) { + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_cleanup_entry, net, &i); + } + vfree(cl_s); + return ret; +} + +/* called under write_lock */ +static void get_counters(const struct ebt_counter *oldcounters, + struct ebt_counter *counters, unsigned int nentries) +{ + int i, cpu; + struct ebt_counter *counter_base; + + /* counters of cpu 0 */ + memcpy(counters, oldcounters, + sizeof(struct ebt_counter) * nentries); + + /* add other counters to those of cpu 0 */ + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; + counter_base = COUNTER_BASE(oldcounters, nentries, cpu); + for (i = 0; i < nentries; i++) + ADD_COUNTER(counters[i], counter_base[i].bcnt, + counter_base[i].pcnt); + } +} + +static int do_replace_finish(struct net *net, struct ebt_replace *repl, + struct ebt_table_info *newinfo) +{ + int ret; + struct ebt_counter *counterstmp = NULL; + /* used to be able to unlock earlier */ + struct ebt_table_info *table; + struct ebt_table *t; + + /* the user wants counters back + * the check on the size is done later, when we have the lock + */ + if (repl->num_counters) { + unsigned long size = repl->num_counters * sizeof(*counterstmp); + counterstmp = vmalloc(size); + if (!counterstmp) + return -ENOMEM; + } + + newinfo->chainstack = NULL; + ret = ebt_verify_pointers(repl, newinfo); + if (ret != 0) + goto free_counterstmp; + + ret = translate_table(net, repl->name, newinfo); + + if (ret != 0) + goto free_counterstmp; + + t = find_table_lock(net, repl->name, &ret, &ebt_mutex); + if (!t) { + ret = -ENOENT; + goto free_iterate; + } + + if (repl->valid_hooks != t->valid_hooks) { + ret = -EINVAL; + goto free_unlock; + } + + if (repl->num_counters && repl->num_counters != t->private->nentries) { + ret = -EINVAL; + goto free_unlock; + } + + /* we have the mutex lock, so no danger in reading this pointer */ + table = t->private; + /* make sure the table can only be rmmod'ed if it contains no rules */ + if (!table->nentries && newinfo->nentries && !try_module_get(t->me)) { + ret = -ENOENT; + goto free_unlock; + } else if (table->nentries && !newinfo->nentries) + module_put(t->me); + /* we need an atomic snapshot of the counters */ + write_lock_bh(&t->lock); + if (repl->num_counters) + get_counters(t->private->counters, counterstmp, + t->private->nentries); + + t->private = newinfo; + write_unlock_bh(&t->lock); + mutex_unlock(&ebt_mutex); + /* so, a user can change the chains while having messed up her counter + * allocation. Only reason why this is done is because this way the lock + * is held only once, while this doesn't bring the kernel into a + * dangerous state. + */ + if (repl->num_counters && + copy_to_user(repl->counters, counterstmp, + repl->num_counters * sizeof(struct ebt_counter))) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); + } + + /* decrease module count and free resources */ + EBT_ENTRY_ITERATE(table->entries, table->entries_size, + ebt_cleanup_entry, net, NULL); + + vfree(table->entries); + ebt_free_table_info(table); + vfree(table); + vfree(counterstmp); + + audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, + AUDIT_XT_OP_REPLACE, GFP_KERNEL); + return 0; + +free_unlock: + mutex_unlock(&ebt_mutex); +free_iterate: + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_cleanup_entry, net, NULL); +free_counterstmp: + vfree(counterstmp); + /* can be initialized in translate_table() */ + ebt_free_table_info(newinfo); + return ret; +} + +/* replace the table */ +static int do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret, countersize; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) + return -EINVAL; + + if (tmp.entries_size == 0) + return -EINVAL; + + /* overflow check */ + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; + + tmp.name[sizeof(tmp.name) - 1] = 0; + + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + ret = -EFAULT; + goto free_entries; + } + + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; +free_entries: + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; +} + +static void __ebt_unregister_table(struct net *net, struct ebt_table *table) +{ + mutex_lock(&ebt_mutex); + list_del(&table->list); + mutex_unlock(&ebt_mutex); + audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, + ebt_cleanup_entry, net, NULL); + if (table->private->nentries) + module_put(table->me); + vfree(table->private->entries); + ebt_free_table_info(table->private); + vfree(table->private); + kfree(table); +} + +int ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops, struct ebt_table **res) +{ + struct ebt_table_info *newinfo; + struct ebt_table *t, *table; + struct ebt_replace_kernel *repl; + int ret, i, countersize; + void *p; + + if (input_table == NULL || (repl = input_table->table) == NULL || + repl->entries == NULL || repl->entries_size == 0 || + repl->counters != NULL || input_table->private != NULL) + return -EINVAL; + + /* Don't add one table to multiple lists. */ + table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; + } + + countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + ret = -ENOMEM; + if (!newinfo) + goto free_table; + + p = vmalloc(repl->entries_size); + if (!p) + goto free_newinfo; + + memcpy(p, repl->entries, repl->entries_size); + newinfo->entries = p; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + /* fill in newinfo and parse the entries */ + newinfo->chainstack = NULL; + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((repl->valid_hooks & (1 << i)) == 0) + newinfo->hook_entry[i] = NULL; + else + newinfo->hook_entry[i] = p + + ((char *)repl->hook_entry[i] - repl->entries); + } + ret = translate_table(net, repl->name, newinfo); + if (ret != 0) + goto free_chainstack; + + table->private = newinfo; + rwlock_init(&table->lock); + mutex_lock(&ebt_mutex); + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + goto free_unlock; + } + } + + /* Hold a reference count if the chains aren't empty */ + if (newinfo->nentries && !try_module_get(table->me)) { + ret = -ENOENT; + goto free_unlock; + } + list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); + mutex_unlock(&ebt_mutex); + + WRITE_ONCE(*res, table); + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret) { + __ebt_unregister_table(net, table); + *res = NULL; + } + + audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, + AUDIT_XT_OP_REGISTER, GFP_KERNEL); + return ret; +free_unlock: + mutex_unlock(&ebt_mutex); +free_chainstack: + ebt_free_table_info(newinfo); + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); +free_table: + kfree(table); +out: + return ret; +} + +static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +{ + struct ebt_table *t; + + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + if (strcmp(t->name, name) == 0) { + mutex_unlock(&ebt_mutex); + return t; + } + } + + mutex_unlock(&ebt_mutex); + return NULL; +} + +void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops) +{ + struct ebt_table *table = __ebt_find_table(net, name); + + if (table) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(ebt_unregister_table_pre_exit); + +void ebt_unregister_table(struct net *net, struct ebt_table *table) +{ + __ebt_unregister_table(net, table); +} + +/* userspace just supplied us with counters */ +static int do_update_counters(struct net *net, const char *name, + struct ebt_counter __user *counters, + unsigned int num_counters, unsigned int len) +{ + int i, ret; + struct ebt_counter *tmp; + struct ebt_table *t; + + if (num_counters == 0) + return -EINVAL; + + tmp = vmalloc(array_size(num_counters, sizeof(*tmp))); + if (!tmp) + return -ENOMEM; + + t = find_table_lock(net, name, &ret, &ebt_mutex); + if (!t) + goto free_tmp; + + if (num_counters != t->private->nentries) { + ret = -EINVAL; + goto unlock_mutex; + } + + if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) { + ret = -EFAULT; + goto unlock_mutex; + } + + /* we want an atomic add of the counters */ + write_lock_bh(&t->lock); + + /* we add to the counters of the first cpu */ + for (i = 0; i < num_counters; i++) + ADD_COUNTER(t->private->counters[i], tmp[i].bcnt, tmp[i].pcnt); + + write_unlock_bh(&t->lock); + ret = 0; +unlock_mutex: + mutex_unlock(&ebt_mutex); +free_tmp: + vfree(tmp); + return ret; +} + +static int update_counters(struct net *net, sockptr_t arg, unsigned int len) +{ + struct ebt_replace hlp; + + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) + return -EFAULT; + + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return -EINVAL; + + return do_update_counters(net, hlp.name, hlp.counters, + hlp.num_counters, len); +} + +static inline int ebt_obj_to_user(char __user *um, const char *_name, + const char *data, int entrysize, + int usersize, int datasize, u8 revision) +{ + char name[EBT_EXTENSION_MAXNAMELEN] = {0}; + + /* ebtables expects 31 bytes long names but xt_match names are 29 bytes + * long. Copy 29 bytes and fill remaining bytes with zeroes. + */ + strlcpy(name, _name, sizeof(name)); + if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) || + put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) || + put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) || + xt_data_to_user(um + entrysize, data, usersize, datasize, + XT_ALIGN(datasize))) + return -EFAULT; + + return 0; +} + +static inline int ebt_match_to_user(const struct ebt_entry_match *m, + const char *base, char __user *ubase) +{ + return ebt_obj_to_user(ubase + ((char *)m - base), + m->u.match->name, m->data, sizeof(*m), + m->u.match->usersize, m->match_size, + m->u.match->revision); +} + +static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) +{ + return ebt_obj_to_user(ubase + ((char *)w - base), + w->u.watcher->name, w->data, sizeof(*w), + w->u.watcher->usersize, w->watcher_size, + w->u.watcher->revision); +} + +static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, + char __user *ubase) +{ + int ret; + char __user *hlp; + const struct ebt_entry_target *t; + + if (e->bitmask == 0) { + /* special case !EBT_ENTRY_OR_ENTRIES */ + if (copy_to_user(ubase + ((char *)e - base), e, + sizeof(struct ebt_entries))) + return -EFAULT; + return 0; + } + + if (copy_to_user(ubase + ((char *)e - base), e, sizeof(*e))) + return -EFAULT; + + hlp = ubase + (((char *)e + e->target_offset) - base); + t = ebt_get_target_c(e); + + ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); + if (ret != 0) + return ret; + ret = EBT_WATCHER_ITERATE(e, ebt_watcher_to_user, base, ubase); + if (ret != 0) + return ret; + ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), + t->u.target->usersize, t->target_size, + t->u.target->revision); + if (ret != 0) + return ret; + + return 0; +} + +static int copy_counters_to_user(struct ebt_table *t, + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) +{ + struct ebt_counter *counterstmp; + int ret = 0; + + /* userspace might not need the counters */ + if (num_counters == 0) + return 0; + + if (num_counters != nentries) + return -EINVAL; + + counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp))); + if (!counterstmp) + return -ENOMEM; + + write_lock_bh(&t->lock); + get_counters(oldcounters, counterstmp, nentries); + write_unlock_bh(&t->lock); + + if (copy_to_user(user, counterstmp, + nentries * sizeof(struct ebt_counter))) + ret = -EFAULT; + vfree(counterstmp); + return ret; +} + +/* called with ebt_mutex locked */ +static int copy_everything_to_user(struct ebt_table *t, void __user *user, + const int *len, int cmd) +{ + struct ebt_replace tmp; + const struct ebt_counter *oldcounters; + unsigned int entries_size, nentries; + int ret; + char *entries; + + if (cmd == EBT_SO_GET_ENTRIES) { + entries_size = t->private->entries_size; + nentries = t->private->nentries; + entries = t->private->entries; + oldcounters = t->private->counters; + } else { + entries_size = t->table->entries_size; + nentries = t->table->nentries; + entries = t->table->entries; + oldcounters = t->table->counters; + } + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (*len != sizeof(struct ebt_replace) + entries_size + + (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) + return -EINVAL; + + if (tmp.nentries != nentries) + return -EINVAL; + + if (tmp.entries_size != entries_size) + return -EINVAL; + + ret = copy_counters_to_user(t, oldcounters, tmp.counters, + tmp.num_counters, nentries); + if (ret) + return ret; + + /* set the match/watcher/target names right */ + return EBT_ENTRY_ITERATE(entries, entries_size, + ebt_entry_to_user, entries, tmp.entries); +} + +#ifdef CONFIG_COMPAT +/* 32 bit-userspace compatibility definitions. */ +struct compat_ebt_replace { + char name[EBT_TABLE_MAXNAMELEN]; + compat_uint_t valid_hooks; + compat_uint_t nentries; + compat_uint_t entries_size; + /* start of the chains */ + compat_uptr_t hook_entry[NF_BR_NUMHOOKS]; + /* nr of counters userspace expects back */ + compat_uint_t num_counters; + /* where the kernel will put the old counters. */ + compat_uptr_t counters; + compat_uptr_t entries; +}; + +/* struct ebt_entry_match, _target and _watcher have same layout */ +struct compat_ebt_entry_mwt { + union { + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + u8 revision; + }; + compat_uptr_t ptr; + } u; + compat_uint_t match_size; + compat_uint_t data[] __aligned(__alignof__(struct compat_ebt_replace)); +}; + +/* account for possible padding between match_size and ->data */ +static int ebt_compat_entry_padsize(void) +{ + BUILD_BUG_ON(sizeof(struct ebt_entry_match) < + sizeof(struct compat_ebt_entry_mwt)); + return (int) sizeof(struct ebt_entry_match) - + sizeof(struct compat_ebt_entry_mwt); +} + +static int ebt_compat_match_offset(const struct xt_match *match, + unsigned int userlen) +{ + /* ebt_among needs special handling. The kernel .matchsize is + * set to -1 at registration time; at runtime an EBT_ALIGN()ed + * value is expected. + * Example: userspace sends 4500, ebt_among.c wants 4504. + */ + if (unlikely(match->matchsize == -1)) + return XT_ALIGN(userlen) - COMPAT_XT_ALIGN(userlen); + return xt_compat_match_offset(match); +} + +static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, + unsigned int *size) +{ + const struct xt_match *match = m->u.match; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = ebt_compat_match_offset(match, m->match_size); + compat_uint_t msize = m->match_size - off; + + if (WARN_ON(off >= m->match_size)) + return -EINVAL; + + if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) || + put_user(match->revision, &cm->u.revision) || + put_user(msize, &cm->match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user(cm->data, m->data)) + return -EFAULT; + } else { + if (xt_data_to_user(cm->data, m->data, match->usersize, msize, + COMPAT_XT_ALIGN(msize))) + return -EFAULT; + } + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += msize; + return 0; +} + +static int compat_target_to_user(struct ebt_entry_target *t, + void __user **dstptr, + unsigned int *size) +{ + const struct xt_target *target = t->u.target; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = xt_compat_target_offset(target); + compat_uint_t tsize = t->target_size - off; + + if (WARN_ON(off >= t->target_size)) + return -EINVAL; + + if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) || + put_user(target->revision, &cm->u.revision) || + put_user(tsize, &cm->match_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user(cm->data, t->data)) + return -EFAULT; + } else { + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize, + COMPAT_XT_ALIGN(tsize))) + return -EFAULT; + } + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += tsize; + return 0; +} + +static int compat_watcher_to_user(struct ebt_entry_watcher *w, + void __user **dstptr, + unsigned int *size) +{ + return compat_target_to_user((struct ebt_entry_target *)w, + dstptr, size); +} + +static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, + unsigned int *size) +{ + struct ebt_entry_target *t; + struct ebt_entry __user *ce; + u32 watchers_offset, target_offset, next_offset; + compat_uint_t origsize; + int ret; + + if (e->bitmask == 0) { + if (*size < sizeof(struct ebt_entries)) + return -EINVAL; + if (copy_to_user(*dstptr, e, sizeof(struct ebt_entries))) + return -EFAULT; + + *dstptr += sizeof(struct ebt_entries); + *size -= sizeof(struct ebt_entries); + return 0; + } + + if (*size < sizeof(*ce)) + return -EINVAL; + + ce = *dstptr; + if (copy_to_user(ce, e, sizeof(*ce))) + return -EFAULT; + + origsize = *size; + *dstptr += sizeof(*ce); + + ret = EBT_MATCH_ITERATE(e, compat_match_to_user, dstptr, size); + if (ret) + return ret; + watchers_offset = e->watchers_offset - (origsize - *size); + + ret = EBT_WATCHER_ITERATE(e, compat_watcher_to_user, dstptr, size); + if (ret) + return ret; + target_offset = e->target_offset - (origsize - *size); + + t = ebt_get_target(e); + + ret = compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + + if (put_user(watchers_offset, &ce->watchers_offset) || + put_user(target_offset, &ce->target_offset) || + put_user(next_offset, &ce->next_offset)) + return -EFAULT; + + *size -= sizeof(*ce); + return 0; +} + +static int compat_calc_match(struct ebt_entry_match *m, int *off) +{ + *off += ebt_compat_match_offset(m->u.match, m->match_size); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_watcher(struct ebt_entry_watcher *w, int *off) +{ + *off += xt_compat_target_offset(w->u.watcher); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_entry(const struct ebt_entry *e, + const struct ebt_table_info *info, + const void *base, + struct compat_ebt_replace *newinfo) +{ + const struct ebt_entry_target *t; + unsigned int entry_offset; + int off, ret, i; + + if (e->bitmask == 0) + return 0; + + off = 0; + entry_offset = (void *)e - base; + + EBT_MATCH_ITERATE(e, compat_calc_match, &off); + EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off); + + t = ebt_get_target_c(e); + + off += xt_compat_target_offset(t->u.target); + off += ebt_compat_entry_padsize(); + + newinfo->entries_size -= off; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + const void *hookptr = info->hook_entry[i]; + if (info->hook_entry[i] && + (e < (struct ebt_entry *)(base - hookptr))) { + newinfo->hook_entry[i] -= off; + pr_debug("0x%08X -> 0x%08X\n", + newinfo->hook_entry[i] + off, + newinfo->hook_entry[i]); + } + } + + return 0; +} + +static int ebt_compat_init_offsets(unsigned int number) +{ + if (number > INT_MAX) + return -EINVAL; + + /* also count the base chain policies */ + number += NF_BR_NUMHOOKS; + + return xt_compat_init_offsets(NFPROTO_BRIDGE, number); +} + +static int compat_table_info(const struct ebt_table_info *info, + struct compat_ebt_replace *newinfo) +{ + unsigned int size = info->entries_size; + const void *entries = info->entries; + int ret; + + newinfo->entries_size = size; + ret = ebt_compat_init_offsets(info->nentries); + if (ret) + return ret; + + return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, + entries, newinfo); +} + +static int compat_copy_everything_to_user(struct ebt_table *t, + void __user *user, int *len, int cmd) +{ + struct compat_ebt_replace repl, tmp; + struct ebt_counter *oldcounters; + struct ebt_table_info tinfo; + int ret; + void __user *pos; + + memset(&tinfo, 0, sizeof(tinfo)); + + if (cmd == EBT_SO_GET_ENTRIES) { + tinfo.entries_size = t->private->entries_size; + tinfo.nentries = t->private->nentries; + tinfo.entries = t->private->entries; + oldcounters = t->private->counters; + } else { + tinfo.entries_size = t->table->entries_size; + tinfo.nentries = t->table->nentries; + tinfo.entries = t->table->entries; + oldcounters = t->table->counters; + } + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (tmp.nentries != tinfo.nentries || + (tmp.num_counters && tmp.num_counters != tinfo.nentries)) + return -EINVAL; + + memcpy(&repl, &tmp, sizeof(repl)); + if (cmd == EBT_SO_GET_ENTRIES) + ret = compat_table_info(t->private, &repl); + else + ret = compat_table_info(&tinfo, &repl); + if (ret) + return ret; + + if (*len != sizeof(tmp) + repl.entries_size + + (tmp.num_counters? tinfo.nentries * sizeof(struct ebt_counter): 0)) { + pr_err("wrong size: *len %d, entries_size %u, replsz %d\n", + *len, tinfo.entries_size, repl.entries_size); + return -EINVAL; + } + + /* userspace might not need the counters */ + ret = copy_counters_to_user(t, oldcounters, compat_ptr(tmp.counters), + tmp.num_counters, tinfo.nentries); + if (ret) + return ret; + + pos = compat_ptr(tmp.entries); + return EBT_ENTRY_ITERATE(tinfo.entries, tinfo.entries_size, + compat_copy_entry_to_user, &pos, &tmp.entries_size); +} + +struct ebt_entries_buf_state { + char *buf_kern_start; /* kernel buffer to copy (translated) data to */ + u32 buf_kern_len; /* total size of kernel buffer */ + u32 buf_kern_offset; /* amount of data copied so far */ + u32 buf_user_offset; /* read position in userspace buffer */ +}; + +static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) +{ + state->buf_kern_offset += sz; + return state->buf_kern_offset >= sz ? 0 : -EINVAL; +} + +static int ebt_buf_add(struct ebt_entries_buf_state *state, + const void *data, unsigned int sz) +{ + if (state->buf_kern_start == NULL) + goto count_only; + + if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len)) + return -EINVAL; + + memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); + + count_only: + state->buf_user_offset += sz; + return ebt_buf_count(state, sz); +} + +static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) +{ + char *b = state->buf_kern_start; + + if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len)) + return -EINVAL; + + if (b != NULL && sz > 0) + memset(b + state->buf_kern_offset, 0, sz); + /* do not adjust ->buf_user_offset here, we added kernel-side padding */ + return ebt_buf_count(state, sz); +} + +enum compat_mwt { + EBT_COMPAT_MATCH, + EBT_COMPAT_WATCHER, + EBT_COMPAT_TARGET, +}; + +static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, + enum compat_mwt compat_mwt, + struct ebt_entries_buf_state *state, + const unsigned char *base) +{ + char name[EBT_EXTENSION_MAXNAMELEN]; + struct xt_match *match; + struct xt_target *wt; + void *dst = NULL; + int off, pad = 0; + unsigned int size_kern, match_size = mwt->match_size; + + if (strscpy(name, mwt->u.name, sizeof(name)) < 0) + return -EINVAL; + + if (state->buf_kern_start) + dst = state->buf_kern_start + state->buf_kern_offset; + + switch (compat_mwt) { + case EBT_COMPAT_MATCH: + match = xt_request_find_match(NFPROTO_BRIDGE, name, + mwt->u.revision); + if (IS_ERR(match)) + return PTR_ERR(match); + + off = ebt_compat_match_offset(match, match_size); + if (dst) { + if (match->compat_from_user) + match->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = match->matchsize; + if (unlikely(size_kern == -1)) + size_kern = match_size; + module_put(match->me); + break; + case EBT_COMPAT_WATCHER: + case EBT_COMPAT_TARGET: + wt = xt_request_find_target(NFPROTO_BRIDGE, name, + mwt->u.revision); + if (IS_ERR(wt)) + return PTR_ERR(wt); + off = xt_compat_target_offset(wt); + + if (dst) { + if (wt->compat_from_user) + wt->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = wt->targetsize; + module_put(wt->me); + break; + + default: + return -EINVAL; + } + + state->buf_kern_offset += match_size + off; + state->buf_user_offset += match_size; + pad = XT_ALIGN(size_kern) - size_kern; + + if (pad > 0 && dst) { + if (WARN_ON(state->buf_kern_len <= pad)) + return -EINVAL; + if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad)) + return -EINVAL; + memset(dst + size_kern, 0, pad); + } + return off + match_size; +} + +/* return size of all matches, watchers or target, including necessary + * alignment and padding. + */ +static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, + unsigned int size_left, enum compat_mwt type, + struct ebt_entries_buf_state *state, const void *base) +{ + const char *buf = (const char *)match32; + int growth = 0; + + if (size_left == 0) + return 0; + + do { + struct ebt_entry_match *match_kern; + int ret; + + if (size_left < sizeof(*match32)) + return -EINVAL; + + match_kern = (struct ebt_entry_match *) state->buf_kern_start; + if (match_kern) { + char *tmp; + tmp = state->buf_kern_start + state->buf_kern_offset; + match_kern = (struct ebt_entry_match *) tmp; + } + ret = ebt_buf_add(state, buf, sizeof(*match32)); + if (ret < 0) + return ret; + size_left -= sizeof(*match32); + + /* add padding before match->data (if any) */ + ret = ebt_buf_add_pad(state, ebt_compat_entry_padsize()); + if (ret < 0) + return ret; + + if (match32->match_size > size_left) + return -EINVAL; + + size_left -= match32->match_size; + + ret = compat_mtw_from_user(match32, type, state, base); + if (ret < 0) + return ret; + + if (WARN_ON(ret < match32->match_size)) + return -EINVAL; + growth += ret - match32->match_size; + growth += ebt_compat_entry_padsize(); + + buf += sizeof(*match32); + buf += match32->match_size; + + if (match_kern) + match_kern->match_size = ret; + + match32 = (struct compat_ebt_entry_mwt *) buf; + } while (size_left); + + return growth; +} + +/* called for all ebt_entry structures. */ +static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, + unsigned int *total, + struct ebt_entries_buf_state *state) +{ + unsigned int i, j, startoff, next_expected_off, new_offset = 0; + /* stores match/watchers/targets & offset of next struct ebt_entry: */ + unsigned int offsets[4]; + unsigned int *offsets_update = NULL; + int ret; + char *buf_start; + + if (*total < sizeof(struct ebt_entries)) + return -EINVAL; + + if (!entry->bitmask) { + *total -= sizeof(struct ebt_entries); + return ebt_buf_add(state, entry, sizeof(struct ebt_entries)); + } + if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry)) + return -EINVAL; + + startoff = state->buf_user_offset; + /* pull in most part of ebt_entry, it does not need to be changed. */ + ret = ebt_buf_add(state, entry, + offsetof(struct ebt_entry, watchers_offset)); + if (ret < 0) + return ret; + + offsets[0] = sizeof(struct ebt_entry); /* matches come first */ + memcpy(&offsets[1], &entry->offsets, sizeof(entry->offsets)); + + if (state->buf_kern_start) { + buf_start = state->buf_kern_start + state->buf_kern_offset; + offsets_update = (unsigned int *) buf_start; + } + ret = ebt_buf_add(state, &offsets[1], + sizeof(offsets) - sizeof(offsets[0])); + if (ret < 0) + return ret; + buf_start = (char *) entry; + /* 0: matches offset, always follows ebt_entry. + * 1: watchers offset, from ebt_entry structure + * 2: target offset, from ebt_entry structure + * 3: next ebt_entry offset, from ebt_entry structure + * + * offsets are relative to beginning of struct ebt_entry (i.e., 0). + */ + for (i = 0; i < 4 ; ++i) { + if (offsets[i] > *total) + return -EINVAL; + + if (i < 3 && offsets[i] == *total) + return -EINVAL; + + if (i == 0) + continue; + if (offsets[i-1] > offsets[i]) + return -EINVAL; + } + + for (i = 0, j = 1 ; j < 4 ; j++, i++) { + struct compat_ebt_entry_mwt *match32; + unsigned int size; + char *buf = buf_start + offsets[i]; + + if (offsets[i] > offsets[j]) + return -EINVAL; + + match32 = (struct compat_ebt_entry_mwt *) buf; + size = offsets[j] - offsets[i]; + ret = ebt_size_mwt(match32, size, i, state, base); + if (ret < 0) + return ret; + new_offset += ret; + if (offsets_update && new_offset) { + pr_debug("change offset %d to %d\n", + offsets_update[i], offsets[j] + new_offset); + offsets_update[i] = offsets[j] + new_offset; + } + } + + if (state->buf_kern_start == NULL) { + unsigned int offset = buf_start - (char *) base; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); + if (ret < 0) + return ret; + } + + next_expected_off = state->buf_user_offset - startoff; + if (next_expected_off != entry->next_offset) + return -EINVAL; + + if (*total < entry->next_offset) + return -EINVAL; + *total -= entry->next_offset; + return 0; +} + +/* repl->entries_size is the size of the ebt_entry blob in userspace. + * It might need more memory when copied to a 64 bit kernel in case + * userspace is 32-bit. So, first task: find out how much memory is needed. + * + * Called before validation is performed. + */ +static int compat_copy_entries(unsigned char *data, unsigned int size_user, + struct ebt_entries_buf_state *state) +{ + unsigned int size_remaining = size_user; + int ret; + + ret = EBT_ENTRY_ITERATE(data, size_user, size_entry_mwt, data, + &size_remaining, state); + if (ret < 0) + return ret; + + if (size_remaining) + return -EINVAL; + + return state->buf_kern_offset; +} + + +static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, + sockptr_t arg, unsigned int len) +{ + struct compat_ebt_replace tmp; + int i; + + if (len < sizeof(tmp)) + return -EINVAL; + + if (copy_from_sockptr(&tmp, arg, sizeof(tmp))) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) + return -EINVAL; + + if (tmp.entries_size == 0) + return -EINVAL; + + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; + + memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); + + /* starting with hook_entry, 32 vs. 64 bit structures are different */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) + repl->hook_entry[i] = compat_ptr(tmp.hook_entry[i]); + + repl->num_counters = tmp.num_counters; + repl->counters = compat_ptr(tmp.counters); + repl->entries = compat_ptr(tmp.entries); + return 0; +} + +static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) +{ + int ret, i, countersize, size64; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + struct ebt_entries_buf_state state; + void *entries_tmp; + + ret = compat_copy_ebt_replace_from_user(&tmp, arg, len); + if (ret) { + /* try real handler in case userland supplied needed padding */ + if (ret == -EINVAL && do_replace(net, arg, len) == 0) + ret = 0; + return ret; + } + + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + memset(&state, 0, sizeof(state)); + + newinfo->entries = vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + ret = -EFAULT; + goto free_entries; + } + + entries_tmp = newinfo->entries; + + xt_compat_lock(NFPROTO_BRIDGE); + + ret = ebt_compat_init_offsets(tmp.nentries); + if (ret < 0) + goto out_unlock; + + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + if (ret < 0) + goto out_unlock; + + pr_debug("tmp.entries_size %d, kern off %d, user off %d delta %d\n", + tmp.entries_size, state.buf_kern_offset, state.buf_user_offset, + xt_compat_calc_jump(NFPROTO_BRIDGE, tmp.entries_size)); + + size64 = ret; + newinfo->entries = vmalloc(size64); + if (!newinfo->entries) { + vfree(entries_tmp); + ret = -ENOMEM; + goto out_unlock; + } + + memset(&state, 0, sizeof(state)); + state.buf_kern_start = newinfo->entries; + state.buf_kern_len = size64; + + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + if (WARN_ON(ret < 0)) { + vfree(entries_tmp); + goto out_unlock; + } + + vfree(entries_tmp); + tmp.entries_size = size64; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + char __user *usrptr; + if (tmp.hook_entry[i]) { + unsigned int delta; + usrptr = (char __user *) tmp.hook_entry[i]; + delta = usrptr - tmp.entries; + usrptr += xt_compat_calc_jump(NFPROTO_BRIDGE, delta); + tmp.hook_entry[i] = (struct ebt_entries __user *)usrptr; + } + } + + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; +free_entries: + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + goto free_entries; +} + +static int compat_update_counters(struct net *net, sockptr_t arg, + unsigned int len) +{ + struct compat_ebt_replace hlp; + + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) + return -EFAULT; + + /* try real handler in case userland supplied needed padding */ + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return update_counters(net, arg, len); + + return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), + hlp.num_counters, len); +} + +static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, + void __user *user, int *len) +{ + int ret; + struct compat_ebt_replace tmp; + struct ebt_table *t; + struct net *net = sock_net(sk); + + if ((cmd == EBT_SO_GET_INFO || cmd == EBT_SO_GET_INIT_INFO) && + *len != sizeof(struct compat_ebt_replace)) + return -EINVAL; + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + tmp.name[sizeof(tmp.name) - 1] = '\0'; + + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + xt_compat_lock(NFPROTO_BRIDGE); + switch (cmd) { + case EBT_SO_GET_INFO: + tmp.nentries = t->private->nentries; + ret = compat_table_info(t->private, &tmp); + if (ret) + goto out; + tmp.valid_hooks = t->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_INIT_INFO: + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + /* try real handler first in case of userland-side padding. + * in case we are dealing with an 'ordinary' 32 bit binary + * without 64bit compatibility padding, this will fail right + * after copy_from_user when the *len argument is validated. + * + * the compat_ variant needs to do one pass over the kernel + * data set to adjust for size differences before it the check. + */ + if (copy_everything_to_user(t, user, len, cmd) == 0) + ret = 0; + else + ret = compat_copy_everything_to_user(t, user, len, cmd); + break; + default: + ret = -EINVAL; + } + out: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + mutex_unlock(&ebt_mutex); + return ret; +} +#endif + +static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + struct net *net = sock_net(sk); + struct ebt_replace tmp; + struct ebt_table *t; + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +#ifdef CONFIG_COMPAT + /* try real handler in case userland supplied needed padding */ + if (in_compat_syscall() && + ((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) || + *len != sizeof(tmp))) + return compat_do_ebt_get_ctl(sk, cmd, user, len); +#endif + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + tmp.name[sizeof(tmp.name) - 1] = '\0'; + + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + switch (cmd) { + case EBT_SO_GET_INFO: + case EBT_SO_GET_INIT_INFO: + if (*len != sizeof(struct ebt_replace)) { + ret = -EINVAL; + mutex_unlock(&ebt_mutex); + break; + } + if (cmd == EBT_SO_GET_INFO) { + tmp.nentries = t->private->nentries; + tmp.entries_size = t->private->entries_size; + tmp.valid_hooks = t->valid_hooks; + } else { + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + } + mutex_unlock(&ebt_mutex); + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + ret = copy_everything_to_user(t, user, len, cmd); + mutex_unlock(&ebt_mutex); + break; + + default: + mutex_unlock(&ebt_mutex); + ret = -EINVAL; + } + + return ret; +} + +static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, + unsigned int len) +{ + struct net *net = sock_net(sk); + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case EBT_SO_SET_ENTRIES: +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(net, arg, len); + else +#endif + ret = do_replace(net, arg, len); + break; + case EBT_SO_SET_COUNTERS: +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_update_counters(net, arg, len); + else +#endif + ret = update_counters(net, arg, len); + break; + default: + ret = -EINVAL; + } + return ret; +} + +static struct nf_sockopt_ops ebt_sockopts = { + .pf = PF_INET, + .set_optmin = EBT_BASE_CTL, + .set_optmax = EBT_SO_SET_MAX + 1, + .set = do_ebt_set_ctl, + .get_optmin = EBT_BASE_CTL, + .get_optmax = EBT_SO_GET_MAX + 1, + .get = do_ebt_get_ctl, + .owner = THIS_MODULE, +}; + +static int __init ebtables_init(void) +{ + int ret; + + ret = xt_register_target(&ebt_standard_target); + if (ret < 0) + return ret; + ret = nf_register_sockopt(&ebt_sockopts); + if (ret < 0) { + xt_unregister_target(&ebt_standard_target); + return ret; + } + + return 0; +} + +static void __exit ebtables_fini(void) +{ + nf_unregister_sockopt(&ebt_sockopts); + xt_unregister_target(&ebt_standard_target); +} + +EXPORT_SYMBOL(ebt_register_table); +EXPORT_SYMBOL(ebt_unregister_table); +EXPORT_SYMBOL(ebt_do_table); +module_init(ebtables_init); +module_exit(ebtables_fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c new file mode 100644 index 000000000..d14b2dbbd --- /dev/null +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -0,0 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/icmp.h> +#include <linux/sysctl.h> +#include <net/route.h> +#include <net/ip.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_bridge.h> + +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +#include "../br_private.h" + +/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff + * has been linearized or cloned. + */ +static int nf_br_ip_fragment(struct net *net, struct sock *sk, + struct sk_buff *skb, + struct nf_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_bridge_frag_data *data, + struct sk_buff *)) +{ + int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + unsigned int hlen, ll_rs, mtu; + ktime_t tstamp = skb->tstamp; + struct ip_frag_state state; + struct iphdr *iph; + int err = 0; + + /* for offloaded checksums cleanup checksum before fragmentation */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto blackhole; + + iph = ip_hdr(skb); + + /* + * Setup starting values + */ + + hlen = iph->ihl * 4; + frag_max_size -= hlen; + ll_rs = LL_RESERVED_SPACE(skb->dev); + mtu = skb->dev->mtu; + + if (skb_has_frag_list(skb)) { + unsigned int first_len = skb_pagelen(skb); + struct ip_fraglist_iter iter; + struct sk_buff *frag; + + if (first_len - hlen > mtu || + skb_headroom(skb) < ll_rs) + goto blackhole; + + if (skb_cloned(skb)) + goto slow_path; + + skb_walk_frags(skb, frag) { + if (frag->len > mtu || + skb_headroom(frag) < hlen + ll_rs) + goto blackhole; + + if (skb_shared(frag)) + goto slow_path; + } + + ip_fraglist_init(skb, iph, hlen, &iter); + + for (;;) { + if (iter.frag) + ip_fraglist_prepare(skb, &iter); + + skb->tstamp = tstamp; + err = output(net, sk, data, skb); + if (err || !iter.frag) + break; + + skb = ip_fraglist_next(&iter); + } + + if (!err) + return 0; + + kfree_skb_list(iter.frag); + + return err; + } +slow_path: + /* This is a linearized skbuff, the original geometry is lost for us. + * This may also be a clone skbuff, we could preserve the geometry for + * the copies but probably not worth the effort. + */ + ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); + + while (state.left > 0) { + struct sk_buff *skb2; + + skb2 = ip_frag_next(skb, &state); + if (IS_ERR(skb2)) { + err = PTR_ERR(skb2); + goto blackhole; + } + + skb2->tstamp = tstamp; + err = output(net, sk, data, skb2); + if (err) + goto blackhole; + } + consume_skb(skb); + return err; + +blackhole: + kfree_skb(skb); + return 0; +} + +/* ip_defrag() expects IPCB() in place. */ +static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, + size_t inet_skb_parm_size) +{ + memcpy(cb, skb->cb, sizeof(*cb)); + memset(skb->cb, 0, inet_skb_parm_size); +} + +static void br_skb_cb_restore(struct sk_buff *skb, + const struct br_input_skb_cb *cb, + u16 fragsz) +{ + memcpy(skb->cb, cb, sizeof(*cb)); + BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; +} + +static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, + const struct nf_hook_state *state) +{ + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; + enum ip_conntrack_info ctinfo; + struct br_input_skb_cb cb; + const struct nf_conn *ct; + int err; + + if (!ip_is_fragment(ip_hdr(skb))) + return NF_ACCEPT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + + br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); + local_bh_disable(); + err = ip_defrag(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + local_bh_enable(); + if (!err) { + br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); + skb->ignore_df = 1; + return NF_ACCEPT; + } + + return NF_STOLEN; +} + +static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; + enum ip_conntrack_info ctinfo; + struct br_input_skb_cb cb; + const struct nf_conn *ct; + int err; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + + br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); + + err = nf_ct_frag6_gather(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + /* queued */ + if (err == -EINPROGRESS) + return NF_STOLEN; + + br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); + return err == 0 ? NF_ACCEPT : NF_DROP; +#else + return NF_ACCEPT; +#endif +} + +static int nf_ct_br_ip_check(const struct sk_buff *skb) +{ + const struct iphdr *iph; + int nhoff, len; + + nhoff = skb_network_offset(skb); + iph = ip_hdr(skb); + if (iph->ihl < 5 || + iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + if (skb->len < nhoff + len || + len < (iph->ihl * 4)) + return -1; + + return 0; +} + +static int nf_ct_br_ipv6_check(const struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + int nhoff, len; + + nhoff = skb_network_offset(skb); + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return -1; + + len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; + if (skb->len < len) + return -1; + + return 0; +} + +static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_hook_state bridge_state = *state; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u32 len; + int ret; + + ct = nf_ct_get(skb, &ctinfo); + if ((ct && !nf_ct_is_template(ct)) || + ctinfo == IP_CT_UNTRACKED) + return NF_ACCEPT; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return NF_ACCEPT; + + len = ntohs(ip_hdr(skb)->tot_len); + if (pskb_trim_rcsum(skb, len)) + return NF_ACCEPT; + + if (nf_ct_br_ip_check(skb)) + return NF_ACCEPT; + + bridge_state.pf = NFPROTO_IPV4; + ret = nf_ct_br_defrag4(skb, &bridge_state); + break; + case htons(ETH_P_IPV6): + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return NF_ACCEPT; + + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + if (pskb_trim_rcsum(skb, len)) + return NF_ACCEPT; + + if (nf_ct_br_ipv6_check(skb)) + return NF_ACCEPT; + + bridge_state.pf = NFPROTO_IPV6; + ret = nf_ct_br_defrag6(skb, &bridge_state); + break; + default: + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + return NF_ACCEPT; + } + + if (ret != NF_ACCEPT) + return ret; + + return nf_conntrack_in(skb, &bridge_state); +} + +static void nf_ct_bridge_frag_save(struct sk_buff *skb, + struct nf_bridge_frag_data *data) +{ + if (skb_vlan_tag_present(skb)) { + data->vlan_present = true; + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + } else { + data->vlan_present = false; + } + skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); +} + +static unsigned int +nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, + int (*output)(struct net *, struct sock *sk, + const struct nf_bridge_frag_data *data, + struct sk_buff *)) +{ + struct nf_bridge_frag_data data; + + if (!BR_INPUT_SKB_CB(skb)->frag_max_size) + return NF_ACCEPT; + + nf_ct_bridge_frag_save(skb, &data); + switch (skb->protocol) { + case htons(ETH_P_IP): + nf_br_ip_fragment(state->net, state->sk, skb, &data, output); + break; + case htons(ETH_P_IPV6): + nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); + break; + default: + WARN_ON_ONCE(1); + return NF_DROP; + } + + return NF_STOLEN; +} + +/* Actually only slow path refragmentation needs this. */ +static int nf_ct_bridge_frag_restore(struct sk_buff *skb, + const struct nf_bridge_frag_data *data) +{ + int err; + + err = skb_cow_head(skb, ETH_HLEN); + if (err) { + kfree_skb(skb); + return -ENOMEM; + } + if (data->vlan_present) + __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); + else if (skb_vlan_tag_present(skb)) + __vlan_hwaccel_clear_tag(skb); + + skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); + skb_reset_mac_header(skb); + + return 0; +} + +static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, + const struct nf_bridge_frag_data *data, + struct sk_buff *skb) +{ + int err; + + err = nf_ct_bridge_frag_restore(skb, data); + if (err < 0) + return err; + + return br_dev_queue_push_xmit(net, sk, skb); +} + +static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + int protoff; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + return nf_conntrack_confirm(skb); + + switch (skb->protocol) { + case htons(ETH_P_IP): + protoff = skb_network_offset(skb) + ip_hdrlen(skb); + break; + case htons(ETH_P_IPV6): { + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + __be16 frag_off; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) + return nf_conntrack_confirm(skb); + } + break; + default: + return NF_ACCEPT; + } + return nf_confirm(skb, protoff, ct, ctinfo); +} + +static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + int ret; + + ret = nf_ct_bridge_confirm(skb); + if (ret != NF_ACCEPT) + return ret; + + return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); +} + +static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { + { + .hook = nf_ct_bridge_pre, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = nf_ct_bridge_post, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, +}; + +static struct nf_ct_bridge_info bridge_info = { + .ops = nf_ct_bridge_hook_ops, + .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), + .me = THIS_MODULE, +}; + +static int __init nf_conntrack_l3proto_bridge_init(void) +{ + nf_ct_bridge_register(&bridge_info); + + return 0; +} + +static void __exit nf_conntrack_l3proto_bridge_fini(void) +{ + nf_ct_bridge_unregister(&bridge_info); +} + +module_init(nf_conntrack_l3proto_bridge_init); +module_exit(nf_conntrack_l3proto_bridge_fini); + +MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c new file mode 100644 index 000000000..1ad61d101 --- /dev/null +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_bridge.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_log.h> + +static void nf_log_bridge_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb, + in, out, loginfo, prefix); +} + +static struct nf_logger nf_bridge_logger __read_mostly = { + .name = "nf_log_bridge", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_bridge_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_bridge_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); +} + +static void __net_exit nf_log_bridge_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_bridge_logger); +} + +static struct pernet_operations nf_log_bridge_net_ops = { + .init = nf_log_bridge_net_init, + .exit = nf_log_bridge_net_exit, +}; + +static int __init nf_log_bridge_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_bridge_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); + return 0; +} + +static void __exit nf_log_bridge_exit(void) +{ + unregister_pernet_subsys(&nf_log_bridge_net_ops); + nf_log_unregister(&nf_bridge_logger); +} + +module_init(nf_log_bridge_init); +module_exit(nf_log_bridge_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter bridge packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 000000000..97805ec42 --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> +#include <linux/if_bridge.h> + +static const struct net_device * +nft_meta_get_bridge(const struct net_device *dev) +{ + if (dev && netif_is_bridge_port(dev)) + return netdev_master_upper_dev_get_rcu((struct net_device *)dev); + + return NULL; +} + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); + u32 *dest = ®s->data[priv->dreg]; + const struct net_device *br_dev; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + br_dev = nft_meta_get_bridge(in); + break; + case NFT_META_BRI_OIFNAME: + br_dev = nft_meta_get_bridge(out); + break; + case NFT_META_BRI_IIFPVID: { + u16 p_pvid; + + br_dev = nft_meta_get_bridge(in); + if (!br_dev || !br_vlan_enabled(br_dev)) + goto err; + + br_vlan_get_pvid_rcu(in, &p_pvid); + nft_reg_store16(dest, p_pvid); + return; + } + case NFT_META_BRI_IIFVPROTO: { + u16 p_proto; + + br_dev = nft_meta_get_bridge(in); + if (!br_dev || !br_vlan_enabled(br_dev)) + goto err; + + br_vlan_get_proto(br_dev, &p_proto); + nft_reg_store16(dest, htons(p_proto)); + return; + } + default: + return nft_meta_get_eval(expr, regs, pkt); + } + + strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ); + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + len = IFNAMSIZ; + break; + case NFT_META_BRI_IIFPVID: + case NFT_META_BRI_IIFVPROTO: + len = sizeof(u16); + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, + .dump = nft_meta_set_dump, + .validate = nft_meta_set_validate, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); +MODULE_DESCRIPTION("Support for bridge dedicated meta key"); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c new file mode 100644 index 000000000..deae2c9a0 --- /dev/null +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014 Pablo Neira Ayuso <pablo@netfilter.org> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv6.h> +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); + + if (skb_vlan_tag_present(oldskb)) { + u16 vid = skb_vlan_tag_get(oldskb); + + __vlan_hwaccel_put_tag(nskb, oldskb->vlan_proto, vid); + } +} + +static int nft_bridge_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + +/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) + * or the bridge port (NF_BRIDGE PREROUTING). + */ +static void nft_reject_br_send_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_bridge_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv4.sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_forward(br_port_get_rcu(dev), nskb, false, true); +} + +static void nft_reject_br_send_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + __wsum csum; + u8 proto; + + if (!nft_bridge_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) + return; + + proto = ip_hdr(oldskb)->protocol; + + if (!skb_csum_unnecessary(oldskb) && + nf_reject_verify_csum(proto) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + net->ipv4.sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_forward(br_port_get_rcu(dev), nskb, false, true); +} + +static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_bridge_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_forward(br_port_get_rcu(dev), nskb, false, true); +} + +static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + if (!nf_reject_verify_csum(proto)) + return true; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + + if (!nft_bridge_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (!reject6_br_csum_ok(oldskb, hook)) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_forward(br_port_get_rcu(dev), nskb, false, true); +} + +static void nft_reject_bridge_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; + + switch (eth_hdr(pkt->skb)->h_proto) { + case htons(ETH_P_IP): + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; + case htons(ETH_P_IPV6): + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + nft_reject_icmpv6_code(priv->icmp_code)); + break; + } + break; + default: + /* No explicit way to reject this protocol, drop it. */ + break; + } +out: + regs->verdict.code = NF_DROP; +} + +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN)); +} + +static int nft_reject_bridge_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code; + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + return 0; +} + +static int nft_reject_bridge_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + default: + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_reject_bridge_type; +static const struct nft_expr_ops nft_reject_bridge_ops = { + .type = &nft_reject_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_bridge_eval, + .init = nft_reject_bridge_init, + .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, +}; + +static struct nft_expr_type nft_reject_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "reject", + .ops = &nft_reject_bridge_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_bridge_module_init(void) +{ + return nft_register_expr(&nft_reject_bridge_type); +} + +static void __exit nft_reject_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_reject_bridge_type); +} + +module_init(nft_reject_bridge_module_init); +module_exit(nft_reject_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject"); +MODULE_DESCRIPTION("Reject packets from bridge via nftables"); |